Basitha commited on
Commit
5e03f32
·
verified ·
1 Parent(s): c563a74

Update common/ResponseValidation.py

Browse files
Files changed (1) hide show
  1. common/ResponseValidation.py +34 -4
common/ResponseValidation.py CHANGED
@@ -138,7 +138,7 @@ Please provide a concise reason why the style does not match.
138
 
139
 
140
 
141
- def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm):
142
  llm_mode_prompt = f"""
143
  You are an expert in market research interview analysis. Given the following question, determine if it is:
144
  - Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
@@ -175,11 +175,16 @@ Please rate the answer on a scale of 0–10 for:
175
  Output strictly in this format:
176
  Plausibility Rating: <0-10>
177
  Relevance Rating: <0-10>
 
 
 
178
  """
179
  eval_response = processor_llm.invoke(eval_prompt)
180
  eval_text = eval_response.content.strip()
181
  plausibility = None
182
  relevance = None
 
 
183
  for line in eval_text.split("\n"):
184
  if line.lower().startswith("plausibility rating:"):
185
  try:
@@ -191,9 +196,23 @@ Relevance Rating: <0-10>
191
  relevance = float(line.split(":", 1)[1].strip())
192
  except Exception as e:
193
  logging.error(f"Error parsing relevance rating: {e}")
 
 
 
 
194
  logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
195
  if plausibility is not None and relevance is not None:
196
- return plausibility >= 8.0 and relevance >= 8.0
 
 
 
 
 
 
 
 
 
 
197
  return False
198
 
199
  else:
@@ -210,18 +229,29 @@ Please rate the answer on a scale of 0–10 for:
210
  1. Accuracy (how well the answer matches the facts in the profile, transcript, or fast facts; penalise any unsupported or fabricated content)
211
  Output strictly in this format:
212
  Accuracy Rating: <0-10>
 
 
213
  """
214
  eval_response = processor_llm.invoke(eval_prompt)
215
  eval_text = eval_response.content.strip()
216
  accuracy = None
 
217
  for line in eval_text.split("\n"):
218
  if line.lower().startswith("accuracy rating:"):
219
  try:
220
  accuracy = float(line.split(":", 1)[1].strip())
221
  except Exception as e:
222
  logging.error(f"Error parsing accuracy rating: {e}")
 
 
223
  logging.info(f"Fact-based evaluation: accuracy={accuracy}")
224
  if accuracy is not None:
225
- return accuracy >= 8.0
 
 
 
 
 
 
 
226
  return False
227
-
 
138
 
139
 
140
 
141
+ def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
142
  llm_mode_prompt = f"""
143
  You are an expert in market research interview analysis. Given the following question, determine if it is:
144
  - Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
 
175
  Output strictly in this format:
176
  Plausibility Rating: <0-10>
177
  Relevance Rating: <0-10>
178
+ If either rating is less than 8, provide a short reason for each below:
179
+ Plausibility Reason: <reason>
180
+ Relevance Reason: <reason>
181
  """
182
  eval_response = processor_llm.invoke(eval_prompt)
183
  eval_text = eval_response.content.strip()
184
  plausibility = None
185
  relevance = None
186
+ plaus_reason = None
187
+ relev_reason = None
188
  for line in eval_text.split("\n"):
189
  if line.lower().startswith("plausibility rating:"):
190
  try:
 
196
  relevance = float(line.split(":", 1)[1].strip())
197
  except Exception as e:
198
  logging.error(f"Error parsing relevance rating: {e}")
199
+ if line.lower().startswith("plausibility reason:"):
200
+ plaus_reason = line.split(":", 1)[1].strip()
201
+ if line.lower().startswith("relevance reason:"):
202
+ relev_reason = line.split(":", 1)[1].strip()
203
  logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
204
  if plausibility is not None and relevance is not None:
205
+ valid = plausibility >= 8.0 and relevance >= 8.0
206
+ if return_explanation:
207
+ feedback = []
208
+ if plausibility < 8.0 and plaus_reason:
209
+ feedback.append(f"Plausibility: {plaus_reason}")
210
+ if relevance < 8.0 and relev_reason:
211
+ feedback.append(f"Relevance: {relev_reason}")
212
+ return valid, "; ".join(feedback) if feedback else None
213
+ return valid
214
+ if return_explanation:
215
+ return False, "Could not parse plausibility/relevance ratings."
216
  return False
217
 
218
  else:
 
229
  1. Accuracy (how well the answer matches the facts in the profile, transcript, or fast facts; penalise any unsupported or fabricated content)
230
  Output strictly in this format:
231
  Accuracy Rating: <0-10>
232
+ If the rating is less than 8, provide a short reason below:
233
+ Accuracy Reason: <reason>
234
  """
235
  eval_response = processor_llm.invoke(eval_prompt)
236
  eval_text = eval_response.content.strip()
237
  accuracy = None
238
+ accuracy_reason = None
239
  for line in eval_text.split("\n"):
240
  if line.lower().startswith("accuracy rating:"):
241
  try:
242
  accuracy = float(line.split(":", 1)[1].strip())
243
  except Exception as e:
244
  logging.error(f"Error parsing accuracy rating: {e}")
245
+ if line.lower().startswith("accuracy reason:"):
246
+ accuracy_reason = line.split(":", 1)[1].strip()
247
  logging.info(f"Fact-based evaluation: accuracy={accuracy}")
248
  if accuracy is not None:
249
+ valid = accuracy >= 8.0
250
+ if return_explanation:
251
+ if not valid and accuracy_reason:
252
+ return False, accuracy_reason
253
+ return valid, None
254
+ return valid
255
+ if return_explanation:
256
+ return False, "Could not parse accuracy rating."
257
  return False