Spaces:

nat232
/

student_sample_panel

Build error

App Files Files Community

Basitha commited on Jul 3, 2025

Commit

5e03f32

verified ·

1 Parent(s): c563a74

Update common/ResponseValidation.py

Browse files

Files changed (1) hide show

common/ResponseValidation.py +34 -4

common/ResponseValidation.py CHANGED Viewed

@@ -138,7 +138,7 @@ Please provide a concise reason why the style does not match.
-def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm):
     llm_mode_prompt = f"""
 You are an expert in market research interview analysis. Given the following question, determine if it is:
 - Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
@@ -175,11 +175,16 @@ Please rate the answer on a scale of 0–10 for:
 Output strictly in this format:
 Plausibility Rating: <0-10>
 Relevance Rating: <0-10>
 """
         eval_response = processor_llm.invoke(eval_prompt)
         eval_text = eval_response.content.strip()
         plausibility = None
         relevance = None
         for line in eval_text.split("\n"):
             if line.lower().startswith("plausibility rating:"):
                 try:
@@ -191,9 +196,23 @@ Relevance Rating: <0-10>
                     relevance = float(line.split(":", 1)[1].strip())
                 except Exception as e:
                     logging.error(f"Error parsing relevance rating: {e}")
         logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
         if plausibility is not None and relevance is not None:
-            return plausibility >= 8.0 and relevance >= 8.0
         return False
     else:
@@ -210,18 +229,29 @@ Please rate the answer on a scale of 0–10 for:
 1. Accuracy (how well the answer matches the facts in the profile, transcript, or fast facts; penalise any unsupported or fabricated content)
 Output strictly in this format:
 Accuracy Rating: <0-10>
 """
         eval_response = processor_llm.invoke(eval_prompt)
         eval_text = eval_response.content.strip()
         accuracy = None
         for line in eval_text.split("\n"):
             if line.lower().startswith("accuracy rating:"):
                 try:
                     accuracy = float(line.split(":", 1)[1].strip())
                 except Exception as e:
                     logging.error(f"Error parsing accuracy rating: {e}")
         logging.info(f"Fact-based evaluation: accuracy={accuracy}")
         if accuracy is not None:
-            return accuracy >= 8.0
         return False

+def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
     llm_mode_prompt = f"""
 You are an expert in market research interview analysis. Given the following question, determine if it is:
 - Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
 Output strictly in this format:
 Plausibility Rating: <0-10>
 Relevance Rating: <0-10>
+If either rating is less than 8, provide a short reason for each below:
+Plausibility Reason: <reason>
+Relevance Reason: <reason>
 """
         eval_response = processor_llm.invoke(eval_prompt)
         eval_text = eval_response.content.strip()
         plausibility = None
         relevance = None
+        plaus_reason = None
+        relev_reason = None
         for line in eval_text.split("\n"):
             if line.lower().startswith("plausibility rating:"):
                 try:
                     relevance = float(line.split(":", 1)[1].strip())
                 except Exception as e:
                     logging.error(f"Error parsing relevance rating: {e}")
+            if line.lower().startswith("plausibility reason:"):
+                plaus_reason = line.split(":", 1)[1].strip()
+            if line.lower().startswith("relevance reason:"):
+                relev_reason = line.split(":", 1)[1].strip()
         logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
         if plausibility is not None and relevance is not None:
+            valid = plausibility >= 8.0 and relevance >= 8.0
+            if return_explanation:
+                feedback = []
+                if plausibility < 8.0 and plaus_reason:
+                    feedback.append(f"Plausibility: {plaus_reason}")
+                if relevance < 8.0 and relev_reason:
+                    feedback.append(f"Relevance: {relev_reason}")
+                return valid, "; ".join(feedback) if feedback else None
+            return valid
+        if return_explanation:
+            return False, "Could not parse plausibility/relevance ratings."
         return False
     else:
 1. Accuracy (how well the answer matches the facts in the profile, transcript, or fast facts; penalise any unsupported or fabricated content)
 Output strictly in this format:
 Accuracy Rating: <0-10>
+If the rating is less than 8, provide a short reason below:
+Accuracy Reason: <reason>
 """
         eval_response = processor_llm.invoke(eval_prompt)
         eval_text = eval_response.content.strip()
         accuracy = None
+        accuracy_reason = None
         for line in eval_text.split("\n"):
             if line.lower().startswith("accuracy rating:"):
                 try:
                     accuracy = float(line.split(":", 1)[1].strip())
                 except Exception as e:
                     logging.error(f"Error parsing accuracy rating: {e}")
+            if line.lower().startswith("accuracy reason:"):
+                accuracy_reason = line.split(":", 1)[1].strip()
         logging.info(f"Fact-based evaluation: accuracy={accuracy}")
         if accuracy is not None:
+            valid = accuracy >= 8.0
+            if return_explanation:
+                if not valid and accuracy_reason:
+                    return False, accuracy_reason
+                return valid, None
+            return valid
+        if return_explanation:
+            return False, "Could not parse accuracy rating."
         return False