Basitha commited on
Commit
91980f8
·
verified ·
1 Parent(s): 9242eb0

Update common/ResponseValidation.py

Browse files
Files changed (1) hide show
  1. common/ResponseValidation.py +112 -105
common/ResponseValidation.py CHANGED
@@ -6,43 +6,34 @@ from RespondentAgent import *
6
  from langchain_groq import ChatGroq
7
 
8
 
9
- def is_first_person(answer, processor_llm):
10
- prompt = f"""
11
- You are an expert in analyzing writing style and narrative perspective.
12
- Determine whether the following response is written from a first-person point of view.
13
- A first-person response includes pronouns such as "I", "me", "my", "mine", "we", "our", or "us" and is written from the perspective of the speaker.
14
- Do not guess. Only say "Yes" if the writing is clearly in first person. Otherwise, say "No".
 
 
 
 
 
 
 
 
 
15
  Response:
16
  \"\"\"{answer}\"\"\"
17
- Output strictly in the following format:
18
  First Person: Yes
19
  or
20
  First Person: No
21
  """
22
- try:
23
- response = processor_llm.invoke(prompt)
24
- content = response.content.strip().lower()
25
- if "first person: yes" in content:
26
- return True
27
- elif "first person: no" in content:
28
  return False
29
- else:
30
- logging.warning(f"Unexpected output format from LLM for first person check: {content}")
31
- return False
32
- except Exception as e:
33
- logging.error(f"LLM failed during first person check: {e}")
34
- return False
35
-
36
 
37
- def matches_user_speaking_style(answer, transcript_text, processor_llm, user_profile, agent_question):
38
- """
39
- Uses the LLM to determine if the answer matches the tone and style of the user's prior speaking style in the transcript.
40
- Returns True if similar, False otherwise.
41
- Incorporates logic to skip style matching for factual questions and uses profile-based criteria.
42
- """
43
- logging.info("[Style Match Check] Entry")
44
-
45
- try:
46
  # Get communication profile
47
  style = user_profile.get_field("Communication", "Style")
48
  tone = user_profile.get_field("Communication", "Tone")
@@ -61,38 +52,39 @@ def matches_user_speaking_style(answer, transcript_text, processor_llm, user_pro
61
  logging.info("[Style Match Check] Question is factual — skipping style comparison")
62
  return True
63
 
 
64
  prompt = f"""
65
- You are a writing style and tone analyst.
66
-
67
- Your job is to assess whether a new response sounds like it was written by the same person who spoke in the interview transcript — considering phrasing, vocabulary, tone, and sentence structure.
68
-
69
- ---
70
- ### Prior Interview Transcript (how the user usually talks):
71
- \"\"\"{transcript_text}\"\"\"
72
-
73
- ---
74
- ### New Response:
75
- \"\"\"{answer}\"\"\"
76
-
77
- ---
78
- ### Style Profile Reference:
79
- - Style: {style}
80
- - Tone: {tone}
81
- - Preferred Length: {length}
82
- - Topics: {topics}
83
-
84
- ---
85
- ### Instructions:
86
- - Check if the *tone*, *style*, and *language* of the new response align with the transcript.
87
- - Use the style profile for reference.
88
- - Focus on phrasing, formality, sentence structure, expressiveness, and personal flair.
89
- - Ignore topic similarity — you’re assessing delivery style.
90
- - Reply only with one of the following:
91
-
92
- Style Match: Yes
93
- or
94
- Style Match: No
95
- """
96
 
97
  logging.info("[Style Match Check] Invoking LLM with style comparison prompt")
98
  response = processor_llm.invoke(prompt)
@@ -105,23 +97,24 @@ def matches_user_speaking_style(answer, transcript_text, processor_llm, user_pro
105
  logging.info("[Style Match Check] Style mismatch detected")
106
  return False
107
  else:
108
- logging.warning(f"[Style Match Check] Unexpected response format: {result}")
109
  return False
110
 
111
  except Exception as e:
112
  logging.error(f"[Style Match Check] LLM failed during comparison: {e}")
113
  return False
114
 
 
115
  def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm):
116
  llm_mode_prompt = f"""
117
- You are an expert in market research interview analysis. Given the following question, determine if it is:
118
- - Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
119
- - Fact-based: objective, factual, or directly verifiable from the respondent's profile or transcript (e.g., age, location, occupation, education, etc.)
120
- Respondent Type: {respondent_type}
121
- Question: {question}
122
- Output strictly in this format:
123
- Evaluation Mode: <Exploratory or Fact-based>
124
- """
125
  response = processor_llm.invoke(llm_mode_prompt)
126
  output = response.content.strip()
127
  evaluation_mode = "exploratory"
@@ -136,20 +129,20 @@ def validate_response(question, answer, user_profile_str, fast_facts_str, interv
136
 
137
  if evaluation_mode == "exploratory":
138
  eval_prompt = f"""
139
- You are an expert market research evaluator. Given the following:
140
- - User Profile: {user_profile_str}
141
- - Fast Facts: {fast_facts_str}
142
- - Interview Transcript: {interview_transcript_text}
143
- - Respondent Type: {respondent_type}
144
- - Question: {question}
145
- - Answer: {answer}
146
- Please rate the answer on a scale of 0-10 for:
147
- 1. Plausibility (how realistic, authentic, and in-character the response is, given the profile and context)
148
- 2. Relevance (how directly and completely the answer addresses the question)
149
- Output strictly in this format:
150
- Plausibility Rating: <0-10>
151
- Relevance Rating: <0-10>
152
- """
153
  eval_response = processor_llm.invoke(eval_prompt)
154
  eval_text = eval_response.content.strip()
155
  plausibility = None
@@ -167,27 +160,24 @@ def validate_response(question, answer, user_profile_str, fast_facts_str, interv
167
  logging.error(f"Error parsing relevance rating: {e}")
168
  logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
169
  if plausibility is not None and relevance is not None:
170
- if plausibility >= 8.0 and relevance >= 8.0:
171
- if not is_first_person(answer, processor_llm):
172
- logging.warning("Did not pass style due to 3rd person use")
173
- return False
174
- return True
175
  return False
 
176
  else:
177
  logging.info("Performing fact-based evaluation (accuracy)...")
178
  eval_prompt = f"""
179
- You are an expert market research evaluator. Given the following:
180
- - User Profile: {user_profile_str}
181
- - Fast Facts: {fast_facts_str}
182
- - Interview Transcript: {interview_transcript_text}
183
- - Respondent Type: {respondent_type}
184
- - Question: {question}
185
- - Answer: {answer}
186
- Please rate the answer on a scale of 0-10 for:
187
- 1. Accuracy (how well the answer matches the facts in the profile, transcript, or fast facts; penalise any unsupported or fabricated content)
188
- Output strictly in this format:
189
- Accuracy Rating: <0-10>
190
- """
191
  eval_response = processor_llm.invoke(eval_prompt)
192
  eval_text = eval_response.content.strip()
193
  accuracy = None
@@ -199,9 +189,26 @@ def validate_response(question, answer, user_profile_str, fast_facts_str, interv
199
  logging.error(f"Error parsing accuracy rating: {e}")
200
  logging.info(f"Fact-based evaluation: accuracy={accuracy}")
201
  if accuracy is not None:
202
- if accuracy >= 8.0:
203
- if not is_first_person(answer, processor_llm):
204
- logging.warning("Did not pass style due to 3rd person use")
205
- return False
206
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  return False
 
6
  from langchain_groq import ChatGroq
7
 
8
 
9
+ def matches_user_speaking_style(answer, transcript_text, processor_llm, user_profile, agent_question):
10
+ """
11
+ Uses the LLM to determine if the answer matches the tone and style of the user's prior speaking style in the transcript.
12
+ Returns True if similar and in first person, False otherwise.
13
+ """
14
+ logging.info("[Style Match Check] Entry")
15
+
16
+ try:
17
+ # First-person perspective check
18
+ fp_prompt = f"""
19
+ You are an expert in analysing writing style and narrative perspective.
20
+ Determine whether the following response is written from a first-person point of view.
21
+ A first-person response includes pronouns like "I", "me", "my", "mine", "we", "our", or "us".
22
+ Say "First Person: Yes" only if clearly first-person, else say "First Person: No".
23
+
24
  Response:
25
  \"\"\"{answer}\"\"\"
26
+ Output format:
27
  First Person: Yes
28
  or
29
  First Person: No
30
  """
31
+ fp_response = processor_llm.invoke(fp_prompt)
32
+ fp_result = fp_response.content.strip().lower()
33
+ if "first person: no" in fp_result:
34
+ logging.warning("[Style Match Check] Failed first-person test")
 
 
35
  return False
 
 
 
 
 
 
 
36
 
 
 
 
 
 
 
 
 
 
37
  # Get communication profile
38
  style = user_profile.get_field("Communication", "Style")
39
  tone = user_profile.get_field("Communication", "Tone")
 
52
  logging.info("[Style Match Check] Question is factual — skipping style comparison")
53
  return True
54
 
55
+ # Style match prompt
56
  prompt = f"""
57
+ You are a writing style and tone analyst.
58
+
59
+ Your job is to assess whether a new response sounds like it was written by the same person who spoke in the interview transcript — considering phrasing, vocabulary, tone, and sentence structure.
60
+
61
+ ---
62
+ ### Prior Interview Transcript (how the user usually talks):
63
+ \"\"\"{transcript_text}\"\"\"
64
+
65
+ ---
66
+ ### New Response:
67
+ \"\"\"{answer}\"\"\"
68
+
69
+ ---
70
+ ### Style Profile Reference:
71
+ - Style: {style}
72
+ - Tone: {tone}
73
+ - Preferred Length: {length}
74
+ - Topics: {topics}
75
+
76
+ ---
77
+ ### Instructions:
78
+ - Check if the *tone*, *style*, and *language* of the new response align with the transcript.
79
+ - Use the style profile for reference.
80
+ - Focus on phrasing, formality, sentence structure, expressiveness, and personal flair.
81
+ - Ignore topic similarity — you’re assessing delivery style.
82
+ - Reply only with one of the following:
83
+
84
+ Style Match: Yes
85
+ or
86
+ Style Match: No
87
+ """
88
 
89
  logging.info("[Style Match Check] Invoking LLM with style comparison prompt")
90
  response = processor_llm.invoke(prompt)
 
97
  logging.info("[Style Match Check] Style mismatch detected")
98
  return False
99
  else:
100
+ logging.warning(f"[Style Match Check] Unexpected output format: {result}")
101
  return False
102
 
103
  except Exception as e:
104
  logging.error(f"[Style Match Check] LLM failed during comparison: {e}")
105
  return False
106
 
107
+
108
  def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm):
109
  llm_mode_prompt = f"""
110
+ You are an expert in market research interview analysis. Given the following question, determine if it is:
111
+ - Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
112
+ - Fact-based: objective, factual, or directly verifiable from the respondent's profile or transcript (e.g., age, location, occupation, education, etc.)
113
+ Respondent Type: {respondent_type}
114
+ Question: {question}
115
+ Output strictly in this format:
116
+ Evaluation Mode: <Exploratory or Fact-based>
117
+ """
118
  response = processor_llm.invoke(llm_mode_prompt)
119
  output = response.content.strip()
120
  evaluation_mode = "exploratory"
 
129
 
130
  if evaluation_mode == "exploratory":
131
  eval_prompt = f"""
132
+ You are an expert market research evaluator. Given the following:
133
+ - User Profile: {user_profile_str}
134
+ - Fast Facts: {fast_facts_str}
135
+ - Interview Transcript: {interview_transcript_text}
136
+ - Respondent Type: {respondent_type}
137
+ - Question: {question}
138
+ - Answer: {answer}
139
+ Please rate the answer on a scale of 010 for:
140
+ 1. Plausibility (how realistic, authentic, and in-character the response is, given the profile and context)
141
+ 2. Relevance (how directly and completely the answer addresses the question)
142
+ Output strictly in this format:
143
+ Plausibility Rating: <0-10>
144
+ Relevance Rating: <0-10>
145
+ """
146
  eval_response = processor_llm.invoke(eval_prompt)
147
  eval_text = eval_response.content.strip()
148
  plausibility = None
 
160
  logging.error(f"Error parsing relevance rating: {e}")
161
  logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
162
  if plausibility is not None and relevance is not None:
163
+ return plausibility >= 8.0 and relevance >= 8.0
 
 
 
 
164
  return False
165
+
166
  else:
167
  logging.info("Performing fact-based evaluation (accuracy)...")
168
  eval_prompt = f"""
169
+ You are an expert market research evaluator. Given the following:
170
+ - User Profile: {user_profile_str}
171
+ - Fast Facts: {fast_facts_str}
172
+ - Interview Transcript: {interview_transcript_text}
173
+ - Respondent Type: {respondent_type}
174
+ - Question: {question}
175
+ - Answer: {answer}
176
+ Please rate the answer on a scale of 010 for:
177
+ 1. Accuracy (how well the answer matches the facts in the profile, transcript, or fast facts; penalise any unsupported or fabricated content)
178
+ Output strictly in this format:
179
+ Accuracy Rating: <0-10>
180
+ """
181
  eval_response = processor_llm.invoke(eval_prompt)
182
  eval_text = eval_response.content.strip()
183
  accuracy = None
 
189
  logging.error(f"Error parsing accuracy rating: {e}")
190
  logging.info(f"Fact-based evaluation: accuracy={accuracy}")
191
  if accuracy is not None:
192
+ return accuracy >= 8.0
193
+ return False
194
+
195
+
196
+ def validate_styled_answer(agent_name, agent_question, styled_answer, user_profile, processor_llm, interview_transcript_text):
197
+ """
198
+ Validates whether the styled answer matches the user's typical speaking style using prior interview transcript and communication profile.
199
+ Returns True if stylistically aligned, False otherwise.
200
+ """
201
+ logging.info("[validate_styled_answer] Entry")
202
+ try:
203
+ is_valid = matches_user_speaking_style(
204
+ answer=styled_answer,
205
+ transcript_text=interview_transcript_text,
206
+ processor_llm=processor_llm,
207
+ user_profile=user_profile,
208
+ agent_question=agent_question
209
+ )
210
+ logging.info(f"[validate_styled_answer] Style validation result: {is_valid}")
211
+ return is_valid
212
+ except Exception as e:
213
+ logging.exception("[validate_styled_answer] Exception during style validation")
214
  return False