File size: 12,157 Bytes
7204280
86a2acf
 
 
 
 
7204280
7e4b656
91980f8
4c269e9
91980f8
843922f
 
 
 
 
 
 
 
7e4b656
843922f
 
 
 
7e4b656
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
843922f
 
 
1d95a70
7e4b656
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
843922f
 
 
 
7e4b656
 
 
 
 
87e16d4
7e4b656
 
843922f
7e4b656
 
 
 
9b14f3e
7e4b656
4c269e9
 
 
 
91980f8
4c269e9
 
1bbcb42
91980f8
4c269e9
1d95a70
4c269e9
91980f8
 
 
4c269e9
91980f8
 
7e4b656
 
1bbcb42
 
 
91980f8
4c269e9
 
95b929d
1bbcb42
7e4b656
1bbcb42
7e4b656
08871c2
7e4b656
 
08871c2
7e4b656
 
 
 
 
08871c2
 
 
7e4b656
 
1bbcb42
7e4b656
 
 
c90a492
95b929d
7e4b656
 
08871c2
5e03f32
7204280
91980f8
 
 
 
 
 
 
 
7204280
 
 
 
 
95b929d
7204280
 
 
 
 
e56d353
7204280
e56d353
736c2ad
91980f8
 
 
 
 
 
736c2ad
7e4b656
 
 
 
 
 
 
 
 
 
 
 
 
91980f8
 
 
5e03f32
 
 
91980f8
e56d353
 
 
 
5e03f32
 
e56d353
 
 
95b929d
fc0c76b
 
e56d353
 
95b929d
fc0c76b
 
5e03f32
 
 
 
e56d353
7204280
5e03f32
 
 
 
 
 
 
 
 
 
 
7204280
91980f8
7204280
fc0c76b
e56d353
736c2ad
91980f8
 
 
 
 
 
736c2ad
 
 
91980f8
 
5e03f32
 
91980f8
e56d353
 
 
5e03f32
e56d353
 
 
95b929d
fc0c76b
 
5e03f32
 
e56d353
7204280
5e03f32
 
 
 
 
 
 
 
7e4b656
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import logging
import gradio as gr
import re

from RespondentAgent import *
from langchain_groq import ChatGroq

def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, respondent_type="INDIVIDUAL", return_explanation=False):
    logging.info("[Style Match Check] Entry")

    try:
        # --- Step 1: Skip style check for factual questions ---
        factual_keywords = [
            "name", "age", "where are you from", "where do you live", "occupation",
            "birthplace", "what do you do", "how old", "which city", "which country"
        ]
        lower_q = agent_question.strip().lower()
        is_factual = any(kw in lower_q for kw in factual_keywords)
        if is_factual:
            logging.info("[Style Match Check] Question is factual — skipping strict style enforcement")
            if return_explanation:
                return True, None
            return True

        # --- Step 2: First-person or collective pronoun check ---
        logging.info(f"[Style Match Check] Performing {'collective' if respondent_type == 'FOCUS GROUP' else 'first-person'} pronoun check")

        if respondent_type == "FOCUS GROUP":
            pronoun_prompt = f"""
You are an expert in writing style analysis.
Determine whether the following response is appropriate for a **focus group**, which must:
- Use collective language ("we", "our", "us", "some of us", "most participants")
- Avoid any first-person singular language ("I", "me", "my", etc.)
- Speak as a group, not as an individual
Check the response below and answer in the following format:
Focus Group Style: Yes
or
Focus Group Style: No
Reason: <short reason>
---
### Question:
{agent_question}
### Response:
{answer}
"""
            response = processor_llm.invoke(pronoun_prompt)
            result = response.content.strip().lower()

            if "focus group style: no" in result:
                explanation = result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in result else "The response does not follow focus group voice."
                logging.warning(f"[Style Match Check] Failed group tone: {explanation}")
                return (False, explanation) if return_explanation else False
        else:
            # INDIVIDUAL — use first-person pronoun validation
            fp_prompt = f"""
You are an expert in writing style analysis.
Determine whether the following response uses a personal **first-person** tone, appropriate for an individual.
- Look for use of "I", "me", "my", "mine", or implied personal ownership.
- Skip judgment on content quality or grammar — just the perspective.
Respond using this format:
First Person: Yes
or
First Person: No
Reason: <short explanation>
---
### Question:
{agent_question}
### Response:
{answer}
"""
            fp_response = processor_llm.invoke(fp_prompt)
            fp_result = fp_response.content.strip().lower()

            if "first person: no" in fp_result:
                explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person."
                logging.warning(f"[Style Match Check] Failed first-person test: {explanation}")
                return (False, explanation) if return_explanation else False

        # --- Step 3: Communication style match ---
        style = user_profile.get_field("Communication", "Style")
        tone = user_profile.get_field("Communication", "Tone")
        length = user_profile.get_field("Communication", "Length")
        topics = user_profile.get_field("Communication", "Topics")

        style_check_prompt = f"""
You are a communication coach and writing style analyst.
Evaluate how well the following response aligns with the given communication profile.
---
### Response:
{answer}
### Communication Profile:
- Style: {style}
- Tone: {tone}
- Preferred Length: {length}
- Common Topics: {topics}
---
### Instructions:
Assess whether the response reflects the user's typical communication style.
Respond with only one of:
- Style Match: Yes
- Style Match: Mostly
- Style Match: No
"""
        style_response = processor_llm.invoke(style_check_prompt)
        style_result = style_response.content.strip().lower()

        if "style match: yes" in style_result or "style match: mostly" in style_result:
            return (True, None) if return_explanation else True

        if "style match: no" in style_result:
            explanation_prompt = f"""
You are a communication coach.
The following response was judged as **not matching** the profile. Briefly explain why.
---
Response: {answer}
Style: {style}
Tone: {tone}
Length: {length}
Topics: {topics}
"""
            explanation_response = processor_llm.invoke(explanation_prompt)
            explanation = explanation_response.content.strip()
            logging.warning(f"[Style Match Check] Style mismatch explanation: {explanation}")
            return (False, explanation) if return_explanation else False

        # Fallback
        logging.warning(f"[Style Match Check] Unclear result format: {style_result}")
        return (False, f"Unexpected format: {style_result}") if return_explanation else False

    except Exception as e:
        logging.error(f"[Style Match Check] Exception: {e}")
        return (False, str(e)) if return_explanation else False

def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
    llm_mode_prompt = f"""
You are an expert in market research interview analysis. Given the following question, determine if it is:
- Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
- Fact-based: objective, factual, or directly verifiable from the respondent's profile or transcript (e.g., age, location, occupation, education, etc.)
Respondent Type: {respondent_type}
Question: {question}
Output strictly in this format:
Evaluation Mode: <Exploratory or Fact-based>
"""
    response = processor_llm.invoke(llm_mode_prompt)
    output = response.content.strip()
    evaluation_mode = "exploratory"
    for line in output.split("\n"):
        if line.lower().startswith("evaluation mode:"):
            val = line.split(":", 1)[1].strip().lower()
            if "fact" in val:
                evaluation_mode = "factbased"
            else:
                evaluation_mode = "exploratory"
    logging.info(f"LLM determined evaluation mode: {evaluation_mode}")

    if evaluation_mode == "exploratory":
        eval_prompt = f"""
You are a market research evaluator. Given the following:
- User Profile: {user_profile_str}
- Fast Facts: {fast_facts_str}
- Interview Transcript: {interview_transcript_text}
- Respondent Type: {respondent_type}
- Question: {question}
- Answer: {answer}
Rate the answer on a scale of 0–10 for:
1. **Plausibility** – Does the response make sense given what is known about the respondent?
   - Consider the respondent’s background, demographics, stated preferences, life stage, interests, and prior responses.
   - Is the answer **internally consistent** and **realistic** for someone like this respondent?
   - Does it feel like something a person in their position would genuinely say or experience?
   - Avoid penalising for style — focus purely on whether the answer is believable and fits the persona.
   - A low plausibility score indicates the answer seems fabricated, out of character, contradictory, or implausible for this individual or group.
2. **Relevance** – Does the answer directly and fully address the specific question asked?
   - Check whether the response clearly **answers the intent of the question** without deflection or vagueness.
   - Consider whether it provides a complete and meaningful response — not just a surface-level or partial reply.
   - Does the answer stay **on-topic** and reflect the subject matter or framing of the original prompt?
   - A low relevance score means the answer is off-topic, evasive, only loosely related, or ignores key elements of the question.
Ignore tone, emotional expression, writing style, grammar, or British/American English differences.
Focus **strictly** on the **content quality**, **truthfulness**, and **alignment with the question and user profile**.
Output strictly in this format:
Plausibility Rating: <0-10>
Relevance Rating: <0-10>
If either rating is less than 8, provide a short reason for each below:
Plausibility Reason: <reason>
Relevance Reason: <reason>
"""
        eval_response = processor_llm.invoke(eval_prompt)
        eval_text = eval_response.content.strip()
        plausibility = None
        relevance = None
        plaus_reason = None
        relev_reason = None
        for line in eval_text.split("\n"):
            if line.lower().startswith("plausibility rating:"):
                try:
                    plausibility = float(line.split(":", 1)[1].strip())
                except Exception as e:
                    logging.error(f"Error parsing plausibility rating: {e}")
            if line.lower().startswith("relevance rating:"):
                try:
                    relevance = float(line.split(":", 1)[1].strip())
                except Exception as e:
                    logging.error(f"Error parsing relevance rating: {e}")
            if line.lower().startswith("plausibility reason:"):
                plaus_reason = line.split(":", 1)[1].strip()
            if line.lower().startswith("relevance reason:"):
                relev_reason = line.split(":", 1)[1].strip()
        logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
        if plausibility is not None and relevance is not None:
            valid = plausibility >= 8.0 and relevance >= 8.0
            if return_explanation:
                feedback = []
                if plausibility < 8.0 and plaus_reason:
                    feedback.append(f"Plausibility: {plaus_reason}")
                if relevance < 8.0 and relev_reason:
                    feedback.append(f"Relevance: {relev_reason}")
                return valid, "; ".join(feedback) if feedback else None
            return valid
        if return_explanation:
            return False, "Could not parse plausibility/relevance ratings."
        return False

    else:
        logging.info("Performing fact-based evaluation (accuracy)...")
        eval_prompt = f"""
You are a market research evaluator. Given the following:
- User Profile: {user_profile_str}
- Fast Facts: {fast_facts_str}
- Interview Transcript: {interview_transcript_text}
- Respondent Type: {respondent_type}
- Question: {question}
- Answer: {answer}
Rate the answer on a scale of 0–10 for:
1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications?
Ignore tone, phrasing, or style. Focus only on factual correctness.
Output strictly in this format:
Accuracy Rating: <0-10>
If the rating is less than 8, provide a short reason below:
Accuracy Reason: <reason>
"""
        eval_response = processor_llm.invoke(eval_prompt)
        eval_text = eval_response.content.strip()
        accuracy = None
        accuracy_reason = None
        for line in eval_text.split("\n"):
            if line.lower().startswith("accuracy rating:"):
                try:
                    accuracy = float(line.split(":", 1)[1].strip())
                except Exception as e:
                    logging.error(f"Error parsing accuracy rating: {e}")
            if line.lower().startswith("accuracy reason:"):
                accuracy_reason = line.split(":", 1)[1].strip()
        logging.info(f"Fact-based evaluation: accuracy={accuracy}")
        if accuracy is not None:
            valid = accuracy >= 8.0
            if return_explanation:
                if not valid and accuracy_reason:
                    return False, accuracy_reason
                return valid, None
            return valid
        if return_explanation:
            return False, "Could not parse accuracy rating."
        return False