import logging import gradio as gr import re from RespondentAgent import * from langchain_groq import ChatGroq def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, respondent_type="INDIVIDUAL", return_explanation=False): logging.info("[Style Match Check] Entry") try: # --- Step 1: Skip style check for factual questions --- factual_keywords = [ "name", "age", "where are you from", "where do you live", "occupation", "birthplace", "what do you do", "how old", "which city", "which country" ] lower_q = agent_question.strip().lower() is_factual = any(kw in lower_q for kw in factual_keywords) if is_factual: logging.info("[Style Match Check] Question is factual — skipping strict style enforcement") if return_explanation: return True, None return True # --- Step 2: First-person or collective pronoun check --- logging.info(f"[Style Match Check] Performing {'collective' if respondent_type == 'FOCUS GROUP' else 'first-person'} pronoun check") if respondent_type == "FOCUS GROUP": pronoun_prompt = f""" You are an expert in writing style analysis. Determine whether the following response is appropriate for a **focus group**, which must: - Use collective language ("we", "our", "us", "some of us", "most participants") - Avoid any first-person singular language ("I", "me", "my", etc.) - Speak as a group, not as an individual Check the response below and answer in the following format: Focus Group Style: Yes or Focus Group Style: No Reason: --- ### Question: {agent_question} ### Response: {answer} """ response = processor_llm.invoke(pronoun_prompt) result = response.content.strip().lower() if "focus group style: no" in result: explanation = result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in result else "The response does not follow focus group voice." logging.warning(f"[Style Match Check] Failed group tone: {explanation}") return (False, explanation) if return_explanation else False else: # INDIVIDUAL — use first-person pronoun validation fp_prompt = f""" You are an expert in writing style analysis. Determine whether the following response uses a personal **first-person** tone, appropriate for an individual. - Look for use of "I", "me", "my", "mine", or implied personal ownership. - Skip judgment on content quality or grammar — just the perspective. Respond using this format: First Person: Yes or First Person: No Reason: --- ### Question: {agent_question} ### Response: {answer} """ fp_response = processor_llm.invoke(fp_prompt) fp_result = fp_response.content.strip().lower() if "first person: no" in fp_result: explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person." logging.warning(f"[Style Match Check] Failed first-person test: {explanation}") return (False, explanation) if return_explanation else False # --- Step 3: Communication style match --- style = user_profile.get_field("Communication", "Style") tone = user_profile.get_field("Communication", "Tone") length = user_profile.get_field("Communication", "Length") topics = user_profile.get_field("Communication", "Topics") style_check_prompt = f""" You are a communication coach and writing style analyst. Evaluate how well the following response aligns with the given communication profile. --- ### Response: {answer} ### Communication Profile: - Style: {style} - Tone: {tone} - Preferred Length: {length} - Common Topics: {topics} --- ### Instructions: Assess whether the response reflects the user's typical communication style. Respond with only one of: - Style Match: Yes - Style Match: Mostly - Style Match: No """ style_response = processor_llm.invoke(style_check_prompt) style_result = style_response.content.strip().lower() if "style match: yes" in style_result or "style match: mostly" in style_result: return (True, None) if return_explanation else True if "style match: no" in style_result: explanation_prompt = f""" You are a communication coach. The following response was judged as **not matching** the profile. Briefly explain why. --- Response: {answer} Style: {style} Tone: {tone} Length: {length} Topics: {topics} """ explanation_response = processor_llm.invoke(explanation_prompt) explanation = explanation_response.content.strip() logging.warning(f"[Style Match Check] Style mismatch explanation: {explanation}") return (False, explanation) if return_explanation else False # Fallback logging.warning(f"[Style Match Check] Unclear result format: {style_result}") return (False, f"Unexpected format: {style_result}") if return_explanation else False except Exception as e: logging.error(f"[Style Match Check] Exception: {e}") return (False, str(e)) if return_explanation else False def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False): llm_mode_prompt = f""" You are an expert in market research interview analysis. Given the following question, determine if it is: - Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.) - Fact-based: objective, factual, or directly verifiable from the respondent's profile or transcript (e.g., age, location, occupation, education, etc.) Respondent Type: {respondent_type} Question: {question} Output strictly in this format: Evaluation Mode: """ response = processor_llm.invoke(llm_mode_prompt) output = response.content.strip() evaluation_mode = "exploratory" for line in output.split("\n"): if line.lower().startswith("evaluation mode:"): val = line.split(":", 1)[1].strip().lower() if "fact" in val: evaluation_mode = "factbased" else: evaluation_mode = "exploratory" logging.info(f"LLM determined evaluation mode: {evaluation_mode}") if evaluation_mode == "exploratory": eval_prompt = f""" You are a market research evaluator. Given the following: - User Profile: {user_profile_str} - Fast Facts: {fast_facts_str} - Interview Transcript: {interview_transcript_text} - Respondent Type: {respondent_type} - Question: {question} - Answer: {answer} Rate the answer on a scale of 0–10 for: 1. **Plausibility** – Does the response make sense given what is known about the respondent? - Consider the respondent’s background, demographics, stated preferences, life stage, interests, and prior responses. - Is the answer **internally consistent** and **realistic** for someone like this respondent? - Does it feel like something a person in their position would genuinely say or experience? - Avoid penalising for style — focus purely on whether the answer is believable and fits the persona. - A low plausibility score indicates the answer seems fabricated, out of character, contradictory, or implausible for this individual or group. 2. **Relevance** – Does the answer directly and fully address the specific question asked? - Check whether the response clearly **answers the intent of the question** without deflection or vagueness. - Consider whether it provides a complete and meaningful response — not just a surface-level or partial reply. - Does the answer stay **on-topic** and reflect the subject matter or framing of the original prompt? - A low relevance score means the answer is off-topic, evasive, only loosely related, or ignores key elements of the question. Ignore tone, emotional expression, writing style, grammar, or British/American English differences. Focus **strictly** on the **content quality**, **truthfulness**, and **alignment with the question and user profile**. Output strictly in this format: Plausibility Rating: <0-10> Relevance Rating: <0-10> If either rating is less than 8, provide a short reason for each below: Plausibility Reason: Relevance Reason: """ eval_response = processor_llm.invoke(eval_prompt) eval_text = eval_response.content.strip() plausibility = None relevance = None plaus_reason = None relev_reason = None for line in eval_text.split("\n"): if line.lower().startswith("plausibility rating:"): try: plausibility = float(line.split(":", 1)[1].strip()) except Exception as e: logging.error(f"Error parsing plausibility rating: {e}") if line.lower().startswith("relevance rating:"): try: relevance = float(line.split(":", 1)[1].strip()) except Exception as e: logging.error(f"Error parsing relevance rating: {e}") if line.lower().startswith("plausibility reason:"): plaus_reason = line.split(":", 1)[1].strip() if line.lower().startswith("relevance reason:"): relev_reason = line.split(":", 1)[1].strip() logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}") if plausibility is not None and relevance is not None: valid = plausibility >= 8.0 and relevance >= 8.0 if return_explanation: feedback = [] if plausibility < 8.0 and plaus_reason: feedback.append(f"Plausibility: {plaus_reason}") if relevance < 8.0 and relev_reason: feedback.append(f"Relevance: {relev_reason}") return valid, "; ".join(feedback) if feedback else None return valid if return_explanation: return False, "Could not parse plausibility/relevance ratings." return False else: logging.info("Performing fact-based evaluation (accuracy)...") eval_prompt = f""" You are a market research evaluator. Given the following: - User Profile: {user_profile_str} - Fast Facts: {fast_facts_str} - Interview Transcript: {interview_transcript_text} - Respondent Type: {respondent_type} - Question: {question} - Answer: {answer} Rate the answer on a scale of 0–10 for: 1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications? Ignore tone, phrasing, or style. Focus only on factual correctness. Output strictly in this format: Accuracy Rating: <0-10> If the rating is less than 8, provide a short reason below: Accuracy Reason: """ eval_response = processor_llm.invoke(eval_prompt) eval_text = eval_response.content.strip() accuracy = None accuracy_reason = None for line in eval_text.split("\n"): if line.lower().startswith("accuracy rating:"): try: accuracy = float(line.split(":", 1)[1].strip()) except Exception as e: logging.error(f"Error parsing accuracy rating: {e}") if line.lower().startswith("accuracy reason:"): accuracy_reason = line.split(":", 1)[1].strip() logging.info(f"Fact-based evaluation: accuracy={accuracy}") if accuracy is not None: valid = accuracy >= 8.0 if return_explanation: if not valid and accuracy_reason: return False, accuracy_reason return valid, None return valid if return_explanation: return False, "Could not parse accuracy rating." return False