scormon-predata-ai commited on
Commit
7e4b656
·
verified ·
1 Parent(s): a8669ab

Update common/ResponseValidation.py

Browse files
Files changed (1) hide show
  1. common/ResponseValidation.py +78 -80
common/ResponseValidation.py CHANGED
@@ -5,13 +5,7 @@ import re
5
  from RespondentAgent import *
6
  from langchain_groq import ChatGroq
7
 
8
-
9
- def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, return_explanation=False):
10
- """
11
- Uses the LLM to determine if the answer matches the expected tone and style
12
- based on the user's communication profile.
13
- Returns (True, None) if it is first-person and stylistically aligned, (False, explanation) otherwise if return_explanation=True.
14
- """
15
  logging.info("[Style Match Check] Entry")
16
 
17
  try:
@@ -23,46 +17,71 @@ def matches_user_speaking_style(answer, processor_llm, user_profile, agent_quest
23
  lower_q = agent_question.strip().lower()
24
  is_factual = any(kw in lower_q for kw in factual_keywords)
25
  if is_factual:
26
- logging.info("[Style Match Check] Question is factual — skipping strict first-person enforcement")
27
  if return_explanation:
28
  return True, None
29
  return True
30
 
31
- # --- Step 2: Context-sensitive first-person check ---
32
- fp_prompt = f"""
33
- You are an expert in analysing writing style and narrative perspective.
34
- Determine whether the following response is *stylistically appropriate* and matches a first-person perspective *when contextually expected*.
35
- - A first-person response typically includes pronouns like "I", "me", "my", "mine", "we", "our", or "us".
36
- - However, for **short factual responses** (e.g. "What's your name?" → "Alex") or answers that clearly imply personal ownership or involvement (e.g. "Our team led the project"), the absence of explicit first-person pronouns can still be acceptable.
37
- - The key question is: **Given the question and expected tone, is the response appropriately personal and aligned with a first-person speaking style?**
38
- Evaluate the response below.
 
 
 
 
 
 
 
 
39
  ### Question:
40
  {agent_question}
41
  ### Response:
42
  {answer}
43
- Return:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  First Person: Yes
45
  or
46
  First Person: No
47
  Reason: <short explanation>
 
 
 
 
 
48
  """
49
- fp_response = processor_llm.invoke(fp_prompt)
50
- fp_result = fp_response.content.strip().lower()
51
 
52
- if "first person: no" in fp_result:
53
- explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person."
54
- logging.warning(f"[Style Match Check] Failed first-person test: {explanation}")
55
- if return_explanation:
56
- return False, explanation
57
- return False
58
 
59
- # --- Step 3: Extract user communication profile ---
60
  style = user_profile.get_field("Communication", "Style")
61
  tone = user_profile.get_field("Communication", "Tone")
62
  length = user_profile.get_field("Communication", "Length")
63
  topics = user_profile.get_field("Communication", "Topics")
64
 
65
- # --- Step 4: Style validation prompt ---
66
  style_check_prompt = f"""
67
  You are a communication coach and writing style analyst.
68
  Evaluate how well the following response aligns with the given communication profile.
@@ -76,66 +95,41 @@ Evaluate how well the following response aligns with the given communication pro
76
  - Common Topics: {topics}
77
  ---
78
  ### Instructions:
79
- Assess how well the response aligns with the communication profile.
80
- - Allow for natural variation and expressive differences.
81
- - If the tone and structure mostly match, even if not perfect, that’s acceptable.
82
- - Only return “Style Match: No” if the response clearly *conflicts* with the profile (e.g., too formal, too short, too robotic).
83
- Respond only with one of:
84
  - Style Match: Yes
85
  - Style Match: Mostly
86
  - Style Match: No
87
  """
88
- logging.info("[Style Match Check] Invoking LLM for profile-based style check")
89
  style_response = processor_llm.invoke(style_check_prompt)
90
  style_result = style_response.content.strip().lower()
91
 
92
  if "style match: yes" in style_result or "style match: mostly" in style_result:
93
- logging.info("[Style Match Check] Match confirmed (or mostly matched)")
94
- if return_explanation:
95
- return True, None
96
- return True
97
 
98
- elif "style match: no" in style_result:
99
- if "first person: yes" in fp_result:
100
- logging.info("[Style Match Check] Potential false negative: First-person check passed but style rejected")
101
-
102
- # --- Ask LLM for explanation on mismatch ---
103
  explanation_prompt = f"""
104
- You are a communication coach and writing style analyst.
105
- The following response was evaluated as NOT matching the given communication profile.
106
- Please provide a concise reason why the style does not match.
107
  ---
108
- ### Response:
109
- {answer}
110
- ### Communication Profile:
111
- - Style: {style}
112
- - Tone: {tone}
113
- - Preferred Length: {length}
114
- - Common Topics: {topics}
115
- ---
116
- ### Please provide a short reason for style mismatch:
117
  """
118
  explanation_response = processor_llm.invoke(explanation_prompt)
119
  explanation = explanation_response.content.strip()
120
- logging.info(f"[Style Match Check] Style mismatch detected: {explanation}")
121
- if return_explanation:
122
- return False, explanation
123
- return False
124
 
125
- else:
126
- logging.warning(f"[Style Match Check] Unexpected output format: {style_result}")
127
- if return_explanation:
128
- return False, f"Unexpected output format: {style_result}"
129
- return False
130
 
131
  except Exception as e:
132
- logging.error(f"[Style Match Check] Exception occurred: {e}")
133
- if return_explanation:
134
- return False, str(e)
135
- return False
136
-
137
-
138
-
139
 
140
  def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
141
  llm_mode_prompt = f"""
@@ -168,13 +162,20 @@ You are a market research evaluator. Given the following:
168
  - Respondent Type: {respondent_type}
169
  - Question: {question}
170
  - Answer: {answer}
171
-
172
  Rate the answer on a scale of 0–10 for:
173
- 1. **Plausibility** – Is the response reasonable and believable given the user’s background?
174
- 2. **Relevance** Does the answer directly and completely address the question?
175
-
176
- Ignore writing style, grammar, tone, emotional depth, or expressiveness. Focus purely on content quality and alignment with the question.
177
-
 
 
 
 
 
 
 
 
178
  Output strictly in this format:
179
  Plausibility Rating: <0-10>
180
  Relevance Rating: <0-10>
@@ -228,12 +229,9 @@ You are a market research evaluator. Given the following:
228
  - Respondent Type: {respondent_type}
229
  - Question: {question}
230
  - Answer: {answer}
231
-
232
  Rate the answer on a scale of 0–10 for:
233
  1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications?
234
-
235
  Ignore tone, phrasing, or style. Focus only on factual correctness.
236
-
237
  Output strictly in this format:
238
  Accuracy Rating: <0-10>
239
  If the rating is less than 8, provide a short reason below:
@@ -261,4 +259,4 @@ Accuracy Reason: <reason>
261
  return valid
262
  if return_explanation:
263
  return False, "Could not parse accuracy rating."
264
- return False
 
5
  from RespondentAgent import *
6
  from langchain_groq import ChatGroq
7
 
8
+ def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, respondent_type="INDIVIDUAL", return_explanation=False):
 
 
 
 
 
 
9
  logging.info("[Style Match Check] Entry")
10
 
11
  try:
 
17
  lower_q = agent_question.strip().lower()
18
  is_factual = any(kw in lower_q for kw in factual_keywords)
19
  if is_factual:
20
+ logging.info("[Style Match Check] Question is factual — skipping strict style enforcement")
21
  if return_explanation:
22
  return True, None
23
  return True
24
 
25
+ # --- Step 2: First-person or collective pronoun check ---
26
+ logging.info(f"[Style Match Check] Performing {'collective' if respondent_type == 'FOCUS GROUP' else 'first-person'} pronoun check")
27
+
28
+ if respondent_type == "FOCUS GROUP":
29
+ pronoun_prompt = f"""
30
+ You are an expert in writing style analysis.
31
+ Determine whether the following response is appropriate for a **focus group**, which must:
32
+ - Use collective language ("we", "our", "us", "some of us", "most participants")
33
+ - Avoid any first-person singular language ("I", "me", "my", etc.)
34
+ - Speak as a group, not as an individual
35
+ Check the response below and answer in the following format:
36
+ Focus Group Style: Yes
37
+ or
38
+ Focus Group Style: No
39
+ Reason: <short reason>
40
+ ---
41
  ### Question:
42
  {agent_question}
43
  ### Response:
44
  {answer}
45
+ """
46
+ response = processor_llm.invoke(pronoun_prompt)
47
+ result = response.content.strip().lower()
48
+
49
+ if "focus group style: no" in result:
50
+ explanation = result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in result else "The response does not follow focus group voice."
51
+ logging.warning(f"[Style Match Check] Failed group tone: {explanation}")
52
+ return (False, explanation) if return_explanation else False
53
+ else:
54
+ # INDIVIDUAL — use first-person pronoun validation
55
+ fp_prompt = f"""
56
+ You are an expert in writing style analysis.
57
+ Determine whether the following response uses a personal **first-person** tone, appropriate for an individual.
58
+ - Look for use of "I", "me", "my", "mine", or implied personal ownership.
59
+ - Skip judgment on content quality or grammar — just the perspective.
60
+ Respond using this format:
61
  First Person: Yes
62
  or
63
  First Person: No
64
  Reason: <short explanation>
65
+ ---
66
+ ### Question:
67
+ {agent_question}
68
+ ### Response:
69
+ {answer}
70
  """
71
+ fp_response = processor_llm.invoke(fp_prompt)
72
+ fp_result = fp_response.content.strip().lower()
73
 
74
+ if "first person: no" in fp_result:
75
+ explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person."
76
+ logging.warning(f"[Style Match Check] Failed first-person test: {explanation}")
77
+ return (False, explanation) if return_explanation else False
 
 
78
 
79
+ # --- Step 3: Communication style match ---
80
  style = user_profile.get_field("Communication", "Style")
81
  tone = user_profile.get_field("Communication", "Tone")
82
  length = user_profile.get_field("Communication", "Length")
83
  topics = user_profile.get_field("Communication", "Topics")
84
 
 
85
  style_check_prompt = f"""
86
  You are a communication coach and writing style analyst.
87
  Evaluate how well the following response aligns with the given communication profile.
 
95
  - Common Topics: {topics}
96
  ---
97
  ### Instructions:
98
+ Assess whether the response reflects the user's typical communication style.
99
+ Respond with only one of:
 
 
 
100
  - Style Match: Yes
101
  - Style Match: Mostly
102
  - Style Match: No
103
  """
 
104
  style_response = processor_llm.invoke(style_check_prompt)
105
  style_result = style_response.content.strip().lower()
106
 
107
  if "style match: yes" in style_result or "style match: mostly" in style_result:
108
+ return (True, None) if return_explanation else True
 
 
 
109
 
110
+ if "style match: no" in style_result:
 
 
 
 
111
  explanation_prompt = f"""
112
+ You are a communication coach.
113
+ The following response was judged as **not matching** the profile. Briefly explain why.
 
114
  ---
115
+ Response: {answer}
116
+ Style: {style}
117
+ Tone: {tone}
118
+ Length: {length}
119
+ Topics: {topics}
 
 
 
 
120
  """
121
  explanation_response = processor_llm.invoke(explanation_prompt)
122
  explanation = explanation_response.content.strip()
123
+ logging.warning(f"[Style Match Check] Style mismatch explanation: {explanation}")
124
+ return (False, explanation) if return_explanation else False
 
 
125
 
126
+ # Fallback
127
+ logging.warning(f"[Style Match Check] Unclear result format: {style_result}")
128
+ return (False, f"Unexpected format: {style_result}") if return_explanation else False
 
 
129
 
130
  except Exception as e:
131
+ logging.error(f"[Style Match Check] Exception: {e}")
132
+ return (False, str(e)) if return_explanation else False
 
 
 
 
 
133
 
134
  def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
135
  llm_mode_prompt = f"""
 
162
  - Respondent Type: {respondent_type}
163
  - Question: {question}
164
  - Answer: {answer}
 
165
  Rate the answer on a scale of 0–10 for:
166
+ 1. **Plausibility** – Does the response make sense given what is known about the respondent?
167
+ - Consider the respondent’s background, demographics, stated preferences, life stage, interests, and prior responses.
168
+ - Is the answer **internally consistent** and **realistic** for someone like this respondent?
169
+ - Does it feel like something a person in their position would genuinely say or experience?
170
+ - Avoid penalising for style — focus purely on whether the answer is believable and fits the persona.
171
+ - A low plausibility score indicates the answer seems fabricated, out of character, contradictory, or implausible for this individual or group.
172
+ 2. **Relevance** – Does the answer directly and fully address the specific question asked?
173
+ - Check whether the response clearly **answers the intent of the question** without deflection or vagueness.
174
+ - Consider whether it provides a complete and meaningful response — not just a surface-level or partial reply.
175
+ - Does the answer stay **on-topic** and reflect the subject matter or framing of the original prompt?
176
+ - A low relevance score means the answer is off-topic, evasive, only loosely related, or ignores key elements of the question.
177
+ Ignore tone, emotional expression, writing style, grammar, or British/American English differences.
178
+ Focus **strictly** on the **content quality**, **truthfulness**, and **alignment with the question and user profile**.
179
  Output strictly in this format:
180
  Plausibility Rating: <0-10>
181
  Relevance Rating: <0-10>
 
229
  - Respondent Type: {respondent_type}
230
  - Question: {question}
231
  - Answer: {answer}
 
232
  Rate the answer on a scale of 0–10 for:
233
  1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications?
 
234
  Ignore tone, phrasing, or style. Focus only on factual correctness.
 
235
  Output strictly in this format:
236
  Accuracy Rating: <0-10>
237
  If the rating is less than 8, provide a short reason below:
 
259
  return valid
260
  if return_explanation:
261
  return False, "Could not parse accuracy rating."
262
+ return False