Spaces:

taboola-cz
/

sel-chat-coach

Running

App Files Files Community

tblaisaacliao commited on Dec 25, 2025

Commit

710cde6

1 Parent(s): 4deab9b

support evaluating coach direct prompt

Browse files

Files changed (4) hide show

src/app/admin/evaluations/[id]/page.tsx +63 -14
src/lib/repositories/evaluation-repository.ts +1 -0
src/lib/services/evaluation-service.ts +125 -12
src/lib/types/models.ts +1 -0

src/app/admin/evaluations/[id]/page.tsx CHANGED Viewed

@@ -191,8 +191,10 @@ export default function EvaluationDetailPage({
       <div className="bg-white rounded-lg shadow p-4 mb-6">
         <div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
           <div>
-            <span className="text-gray-500">Student Prompt:</span>
-            <p className="font-medium text-gray-900">{evaluation.studentPromptId}</p>
           </div>
           <div>
             <span className="text-gray-500">Model Used:</span>
@@ -214,9 +216,26 @@ export default function EvaluationDetailPage({
       {/* Score Sections */}
       <div className="grid grid-cols-1 lg:grid-cols-3 gap-6 mb-6">
         <ScoreSection
-          title="Prompt Design"
           overall={evaluation.scores.promptDesign.overall}
-          scores={[
             {
               label: 'Clarity',
               value: evaluation.scores.promptDesign.clarity,
@@ -238,9 +257,26 @@ export default function EvaluationDetailPage({
         />
         <ScoreSection
-          title="Training Effectiveness"
           overall={evaluation.scores.trainingEffectiveness.overall}
-          scores={[
             {
               label: 'Challenge Level',
               value: evaluation.scores.trainingEffectiveness.challengeLevel,
@@ -262,9 +298,22 @@ export default function EvaluationDetailPage({
         />
         <ScoreSection
-          title="Conversation Quality"
           overall={evaluation.scores.conversationQuality.overall}
-          scores={[
             {
               label: 'Teacher Insights',
               value: evaluation.scores.conversationQuality.teacherInsights,
@@ -301,7 +350,7 @@ export default function EvaluationDetailPage({
         {/* Strengths */}
         <div className="bg-white rounded-lg shadow p-6">
           <h3 className="text-lg font-semibold text-gray-900 mb-4 flex items-center gap-2">
-            <span className="text-green-500">✓</span> Prompt Strengths
           </h3>
           {evaluation.feedback.strengths.length > 0 ? (
             <ul className="space-y-2">
@@ -313,14 +362,14 @@ export default function EvaluationDetailPage({
               ))}
             </ul>
           ) : (
-            <p className="text-gray-500 text-sm">No strengths identified</p>
           )}
         </div>
         {/* Improvement Areas */}
         <div className="bg-white rounded-lg shadow p-6">
           <h3 className="text-lg font-semibold text-gray-900 mb-4 flex items-center gap-2">
-            <span className="text-yellow-500">!</span> Prompt Improvements
           </h3>
           {evaluation.feedback.improvementAreas.length > 0 ? (
             <ul className="space-y-2">
@@ -332,7 +381,7 @@ export default function EvaluationDetailPage({
               ))}
             </ul>
           ) : (
-            <p className="text-gray-500 text-sm">No improvement areas identified</p>
           )}
         </div>
       </div>
@@ -340,7 +389,7 @@ export default function EvaluationDetailPage({
       {/* Prompt Suggestions */}
       <div className="bg-blue-50 border border-blue-200 rounded-lg p-6 mb-6">
         <h3 className="text-lg font-semibold text-blue-900 mb-4 flex items-center gap-2">
-          <span>💡</span> Prompt Improvement Suggestions
         </h3>
         {evaluation.feedback.promptSuggestions.length > 0 ? (
           <ul className="space-y-2">
@@ -352,7 +401,7 @@ export default function EvaluationDetailPage({
             ))}
           </ul>
         ) : (
-          <p className="text-blue-700 text-sm">No specific suggestions</p>
         )}
       </div>

       <div className="bg-white rounded-lg shadow p-4 mb-6">
         <div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
           <div>
+            <span className="text-gray-500">對話類型:</span>
+            <p className="font-medium text-gray-900">
+              {evaluation.evaluationMode === 'coach_direct' ? '教練直接對話' : `學生對話 (${evaluation.studentPromptId})`}
+            </p>
           </div>
           <div>
             <span className="text-gray-500">Model Used:</span>
       {/* Score Sections */}
       <div className="grid grid-cols-1 lg:grid-cols-3 gap-6 mb-6">
         <ScoreSection
+          title={evaluation.evaluationMode === 'coach_direct' ? '教練指導品質' : 'Prompt Design'}
           overall={evaluation.scores.promptDesign.overall}
+          scores={evaluation.evaluationMode === 'coach_direct' ? [
+            {
+              label: '指導清晰度',
+              value: evaluation.scores.promptDesign.clarity,
+            },
+            {
+              label: '回應完整度',
+              value: evaluation.scores.promptDesign.completeness,
+            },
+            {
+              label: '建議具體性',
+              value: evaluation.scores.promptDesign.specificity,
+            },
+            {
+              label: '風格一致性',
+              value: evaluation.scores.promptDesign.consistency,
+            },
+          ] : [
             {
               label: 'Clarity',
               value: evaluation.scores.promptDesign.clarity,
         />
         <ScoreSection
+          title={evaluation.evaluationMode === 'coach_direct' ? '教師學習成效' : 'Training Effectiveness'}
           overall={evaluation.scores.trainingEffectiveness.overall}
+          scores={evaluation.evaluationMode === 'coach_direct' ? [
+            {
+              label: '洞察深度',
+              value: evaluation.scores.trainingEffectiveness.challengeLevel,
+            },
+            {
+              label: '學習機會',
+              value: evaluation.scores.trainingEffectiveness.learningOpportunities,
+            },
+            {
+              label: '實用價值',
+              value: evaluation.scores.trainingEffectiveness.realisticScenarios,
+            },
+            {
+              label: '互動深度',
+              value: evaluation.scores.trainingEffectiveness.engagementDepth,
+            },
+          ] : [
             {
               label: 'Challenge Level',
               value: evaluation.scores.trainingEffectiveness.challengeLevel,
         />
         <ScoreSection
+          title={evaluation.evaluationMode === 'coach_direct' ? '對話品質' : 'Conversation Quality'}
           overall={evaluation.scores.conversationQuality.overall}
+          scores={evaluation.evaluationMode === 'coach_direct' ? [
+            {
+              label: '教師收穫',
+              value: evaluation.scores.conversationQuality.teacherInsights,
+            },
+            {
+              label: '對話深度',
+              value: evaluation.scores.conversationQuality.interactionDepth,
+            },
+            {
+              label: '教育價值',
+              value: evaluation.scores.conversationQuality.educationalValue,
+            },
+          ] : [
             {
               label: 'Teacher Insights',
               value: evaluation.scores.conversationQuality.teacherInsights,
         {/* Strengths */}
         <div className="bg-white rounded-lg shadow p-6">
           <h3 className="text-lg font-semibold text-gray-900 mb-4 flex items-center gap-2">
+            <span className="text-green-500">✓</span> {evaluation.evaluationMode === 'coach_direct' ? '教練優點' : 'Prompt Strengths'}
           </h3>
           {evaluation.feedback.strengths.length > 0 ? (
             <ul className="space-y-2">
               ))}
             </ul>
           ) : (
+            <p className="text-gray-500 text-sm">{evaluation.evaluationMode === 'coach_direct' ? '無明確優點' : 'No strengths identified'}</p>
           )}
         </div>
         {/* Improvement Areas */}
         <div className="bg-white rounded-lg shadow p-6">
           <h3 className="text-lg font-semibold text-gray-900 mb-4 flex items-center gap-2">
+            <span className="text-yellow-500">!</span> {evaluation.evaluationMode === 'coach_direct' ? '教練改進空間' : 'Prompt Improvements'}
           </h3>
           {evaluation.feedback.improvementAreas.length > 0 ? (
             <ul className="space-y-2">
               ))}
             </ul>
           ) : (
+            <p className="text-gray-500 text-sm">{evaluation.evaluationMode === 'coach_direct' ? '無明確改進建議' : 'No improvement areas identified'}</p>
           )}
         </div>
       </div>
       {/* Prompt Suggestions */}
       <div className="bg-blue-50 border border-blue-200 rounded-lg p-6 mb-6">
         <h3 className="text-lg font-semibold text-blue-900 mb-4 flex items-center gap-2">
+          <span>💡</span> {evaluation.evaluationMode === 'coach_direct' ? '教練提示改進建議' : 'Prompt Improvement Suggestions'}
         </h3>
         {evaluation.feedback.promptSuggestions.length > 0 ? (
           <ul className="space-y-2">
             ))}
           </ul>
         ) : (
+          <p className="text-blue-700 text-sm">{evaluation.evaluationMode === 'coach_direct' ? '無具體建議' : 'No specific suggestions'}</p>
         )}
       </div>

src/lib/repositories/evaluation-repository.ts CHANGED Viewed

@@ -30,6 +30,7 @@ export class EvaluationRepository {
       conversationId: row.conversationId,
       studentPromptId: row.studentPromptId || undefined,
       evaluationType: row.evaluationType as Evaluation['evaluationType'],
       modelUsed: row.modelUsed,
       evaluatedAt: row.evaluatedAt,
       evaluatedBy: row.evaluatedBy || undefined,

       conversationId: row.conversationId,
       studentPromptId: row.studentPromptId || undefined,
       evaluationType: row.evaluationType as Evaluation['evaluationType'],
+      evaluationMode: row.studentPromptId === 'coach_direct' ? 'coach_direct' : 'student',
       modelUsed: row.modelUsed,
       evaluatedAt: row.evaluatedAt,
       evaluatedBy: row.evaluatedBy || undefined,

src/lib/services/evaluation-service.ts CHANGED Viewed

@@ -18,6 +18,93 @@ const openai = createOpenAI({
 const EVALUATION_MODEL = process.env.MODEL_NAME || 'gpt-4o-mini';
 const EVALUATION_SYSTEM_PROMPT = `你是一位專業的社會情緒學習（SEL）教師培訓應用程式評估專家。
 你的目標是評估**教師在對話中的體驗**，以便提示工程師改進學生提示。
@@ -143,17 +230,25 @@ export class EvaluationService {
   private formatConversationForEvaluation(
     conversation: Conversation,
-    messages: Message[]
   ): string {
     const formattedMessages = messages
       .filter((m) => m.role !== 'system')
       .map((m) => {
-        const speaker =
-          m.role === 'user'
-            ? '老師'
-            : m.speaker === 'student'
-              ? '學生'
-              : '教練';
         return `${speaker}: ${m.content}`;
       })
       .join('\n');
@@ -221,6 +316,9 @@ export class EvaluationService {
       }
     }
     // Get the system prompt from the conversation
     const systemPrompt =
       conversation.systemPrompt || '(No system prompt available)';
@@ -228,11 +326,21 @@ export class EvaluationService {
     // Format conversation for evaluation
     const formattedConversation = this.formatConversationForEvaluation(
       conversation,
-      messages
     );
-    // Build the user prompt
-    const userPrompt = `
 學生系統提示：
 ${systemPrompt}
@@ -241,15 +349,20 @@ ${formattedConversation}
 請評估此對話並以 JSON 格式返回你的評估結果。`;
     console.log(
-      `[Evaluation] Evaluating conversation ${conversationId} with ${messages.length} messages`
     );
     try {
       // Call the AI model
       const { text } = await generateText({
         model: openai(EVALUATION_MODEL),
-        system: EVALUATION_SYSTEM_PROMPT,
         prompt: userPrompt,
         temperature: 0.3, // Lower temperature for more consistent results
       });

 const EVALUATION_MODEL = process.env.MODEL_NAME || 'gpt-4o-mini';
+// Evaluation prompt for coach-direct conversations (no student involved)
+const COACH_DIRECT_EVALUATION_SYSTEM_PROMPT = `你是一位專業的社會情緒學習（SEL）教練輔導評估專家。
+你的目標是評估**教練與教師之間的直接對話品質**，以便改進教練系統的輔導效能。
+你將會收到：
+1. 教練系統提示 - 定義教練角色的提示文字
+2. 教師與 AI 教練之間的對話
+**重要說明：這是直接的教練-教師對話，沒有學生角色參與。**
+**重要評估原則：**
+- 以教師體驗為核心：教師在這次諮詢後感覺如何？有獲得幫助嗎？
+- 即使教練提示設計良好，如果對話品質差，整體評分也應該低
+- 如果教師輸入無意義內容（如隨機數字、亂碼），在 rationale 中註明並給予低分
+請評估以下維度（評分 1-5 分，1=差，5=優秀）：
+1. 教練指導品質（權重 50%）
+   - 指導清晰度：教練的建議是否清楚且可執行？
+   - 回應完整度：教練是否充分回應教師的疑問和需求？
+   - 建議具體性：教練是否提供具體、實用的建議？
+   - 風格一致性：教練是否保持支持性、一致的溝通風格？
+2. 教師學習成效（權重 50%）
+   - 洞察深度：教師是否獲得有意義的洞察？
+   - 學習機會：對話是否創造了學習和成長的機會？
+   - 實用價值：教練的建議是否能應用於實際教學情境？
+   - 互動深度：教師是否有實質性地參與對話？
+3. 對話品質
+   - 教師收穫：教師是否從對話中有所收穫？
+   - 對話深度：對話是否有實質性內容？
+   - 教育價值：對話是否具有教育價值？
+**整體評分計算：**
+overallScore = 0.5 × coachingQuality.overall + 0.5 × teacherLearning.overall
+**低分情況（應給 1-2 分）：**
+- 教師輸入無意義內容（隨機數字、單字、亂碼）
+- 對話過短或缺乏實質互動
+- 教師明顯沒有認真參與
+請以繁體中文回應，並返回以下 JSON 結構：
+{
+  "teacherEngagement": {
+    "level": "<high|medium|low|none - 教師參與程度>",
+    "warning": "<如果 level 是 low 或 none，說明為什麼教師沒有認真參與，否則為空字串>"
+  },
+  "promptDesign": {
+    "clarity": <數字 1-5 - 指導清晰度>,
+    "completeness": <數字 1-5 - 回應完整度>,
+    "specificity": <數字 1-5 - 建議具體性>,
+    "consistency": <數字 1-5 - 風格一致性>,
+    "overall": <數字 1-5>,
+    "rationale": "<教練指導品質說明>"
+  },
+  "trainingEffectiveness": {
+    "challengeLevel": <數字 1-5 - 洞察深度>,
+    "learningOpportunities": <數字 1-5 - 學習機會>,
+    "realisticScenarios": <數字 1-5 - 實用價值>,
+    "engagementDepth": <數字 1-5 - 互動深度>,
+    "overall": <數字 1-5>,
+    "rationale": "<教師學習成效說明>"
+  },
+  "conversationQuality": {
+    "teacherInsights": <數字 1-5 - 教師收穫>,
+    "interactionDepth": <數字 1-5 - 對話深度>,
+    "educationalValue": <數字 1-5 - 教育價值>,
+    "overall": <數字 1-5>,
+    "rationale": "<對話品質說明>"
+  },
+  "overallScore": <數字 1-5，按權重計算>,
+  "strengths": ["<教練的優點>"],
+  "improvementAreas": ["<教練需要改進的地方>"],
+  "promptSuggestions": ["<具體的教練提示修改建議>"]
+}
+**teacherEngagement.level 判斷標準：**
+- "high": 教師積極參與，提出有意義的問題和回應
+- "medium": 教師有參與但互動較淺
+- "low": 教師參與度低，回應簡短或缺乏深度
+- "none": 教師輸入無意義內容（隨機數字、亂碼、測試文字）
+只返回有效的 JSON，不要其他文字。`;
+// Evaluation prompt for student conversations (with simulated student)
 const EVALUATION_SYSTEM_PROMPT = `你是一位專業的社會情緒學習（SEL）教師培訓應用程式評估專家。
 你的目標是評估**教師在對話中的體驗**，以便提示工程師改進學生提示。
   private formatConversationForEvaluation(
     conversation: Conversation,
+    messages: Message[],
+    isCoachDirect: boolean = false
   ): string {
     const formattedMessages = messages
       .filter((m) => m.role !== 'system')
       .map((m) => {
+        let speaker: string;
+        if (isCoachDirect) {
+          // For coach-direct: only 老師 and 教練
+          speaker = m.role === 'user' ? '老師' : '教練';
+        } else {
+          // For student conversations: 老師, 學生, 教練
+          speaker =
+            m.role === 'user'
+              ? '老師'
+              : m.speaker === 'student'
+                ? '學生'
+                : '教練';
+        }
         return `${speaker}: ${m.content}`;
       })
       .join('\n');
       }
     }
+    // Detect conversation type
+    const isCoachDirect = conversation.studentPromptId === 'coach_direct';
     // Get the system prompt from the conversation
     const systemPrompt =
       conversation.systemPrompt || '(No system prompt available)';
     // Format conversation for evaluation
     const formattedConversation = this.formatConversationForEvaluation(
       conversation,
+      messages,
+      isCoachDirect
     );
+    // Build the user prompt based on conversation type
+    const userPrompt = isCoachDirect
+      ? `
+教練系統提示：
+${systemPrompt}
+對話內容（共 ${messages.length} 則訊息）：
+${formattedConversation}
+請評估此教練-教師對話並以 JSON 格式返回你的評估結果。`
+      : `
 學生系統提示：
 ${systemPrompt}
 請評估此對話並以 JSON 格式返回你的評估結果。`;
+    // Select the appropriate evaluation system prompt
+    const evaluationSystemPrompt = isCoachDirect
+      ? COACH_DIRECT_EVALUATION_SYSTEM_PROMPT
+      : EVALUATION_SYSTEM_PROMPT;
     console.log(
+      `[Evaluation] Evaluating conversation ${conversationId} with ${messages.length} messages (mode: ${isCoachDirect ? 'coach_direct' : 'student'})`
     );
     try {
       // Call the AI model
       const { text } = await generateText({
         model: openai(EVALUATION_MODEL),
+        system: evaluationSystemPrompt,
         prompt: userPrompt,
         temperature: 0.3, // Lower temperature for more consistent results
       });

src/lib/types/models.ts CHANGED Viewed

@@ -126,6 +126,7 @@ export interface Evaluation {
   conversationId: string;
   studentPromptId?: string;
   evaluationType: EvaluationType;
   modelUsed: string;
   evaluatedAt: string;
   evaluatedBy?: string;

   conversationId: string;
   studentPromptId?: string;
   evaluationType: EvaluationType;
+  evaluationMode?: 'student' | 'coach_direct';
   modelUsed: string;
   evaluatedAt: string;
   evaluatedBy?: string;