Spaces:
Build error
Build error
| import logging | |
| import gradio as gr | |
| import re | |
| from RespondentAgent import * | |
| from langchain_groq import ChatGroq | |
| def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, respondent_type="INDIVIDUAL", return_explanation=False): | |
| logging.info("[Style Match Check] Entry") | |
| try: | |
| # --- Step 1: Skip style check for factual questions --- | |
| factual_keywords = [ | |
| "name", "age", "where are you from", "where do you live", "occupation", | |
| "birthplace", "what do you do", "how old", "which city", "which country" | |
| ] | |
| lower_q = agent_question.strip().lower() | |
| is_factual = any(kw in lower_q for kw in factual_keywords) | |
| if is_factual: | |
| logging.info("[Style Match Check] Question is factual — skipping strict style enforcement") | |
| if return_explanation: | |
| return True, None | |
| return True | |
| # --- Step 2: First-person or collective pronoun check --- | |
| logging.info(f"[Style Match Check] Performing {'collective' if respondent_type == 'FOCUS GROUP' else 'first-person'} pronoun check") | |
| if respondent_type == "FOCUS GROUP": | |
| pronoun_prompt = f""" | |
| You are an expert in writing style analysis. | |
| Determine whether the following response is appropriate for a **focus group**, which must: | |
| - Use collective language ("we", "our", "us", "some of us", "most participants") | |
| - Avoid any first-person singular language ("I", "me", "my", etc.) | |
| - Speak as a group, not as an individual | |
| Check the response below and answer in the following format: | |
| Focus Group Style: Yes | |
| or | |
| Focus Group Style: No | |
| Reason: <short reason> | |
| --- | |
| ### Question: | |
| {agent_question} | |
| ### Response: | |
| {answer} | |
| """ | |
| response = processor_llm.invoke(pronoun_prompt) | |
| result = response.content.strip().lower() | |
| if "focus group style: no" in result: | |
| explanation = result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in result else "The response does not follow focus group voice." | |
| logging.warning(f"[Style Match Check] Failed group tone: {explanation}") | |
| return (False, explanation) if return_explanation else False | |
| else: | |
| # INDIVIDUAL — use first-person pronoun validation | |
| fp_prompt = f""" | |
| You are an expert in writing style analysis. | |
| Determine whether the following response uses a personal **first-person** tone, appropriate for an individual. | |
| - Look for use of "I", "me", "my", "mine", or implied personal ownership. | |
| - Skip judgment on content quality or grammar — just the perspective. | |
| Respond using this format: | |
| First Person: Yes | |
| or | |
| First Person: No | |
| Reason: <short explanation> | |
| --- | |
| ### Question: | |
| {agent_question} | |
| ### Response: | |
| {answer} | |
| """ | |
| fp_response = processor_llm.invoke(fp_prompt) | |
| fp_result = fp_response.content.strip().lower() | |
| if "first person: no" in fp_result: | |
| explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person." | |
| logging.warning(f"[Style Match Check] Failed first-person test: {explanation}") | |
| return (False, explanation) if return_explanation else False | |
| # --- Step 3: Communication style match --- | |
| style = user_profile.get_field("Communication", "Style") | |
| tone = user_profile.get_field("Communication", "Tone") | |
| length = user_profile.get_field("Communication", "Length") | |
| topics = user_profile.get_field("Communication", "Topics") | |
| style_check_prompt = f""" | |
| You are a communication coach and writing style analyst. | |
| Evaluate how well the following response aligns with the given communication profile. | |
| --- | |
| ### Response: | |
| {answer} | |
| ### Communication Profile: | |
| - Style: {style} | |
| - Tone: {tone} | |
| - Preferred Length: {length} | |
| - Common Topics: {topics} | |
| --- | |
| ### Instructions: | |
| Assess whether the response reflects the user's typical communication style. | |
| Respond with only one of: | |
| - Style Match: Yes | |
| - Style Match: Mostly | |
| - Style Match: No | |
| """ | |
| style_response = processor_llm.invoke(style_check_prompt) | |
| style_result = style_response.content.strip().lower() | |
| if "style match: yes" in style_result or "style match: mostly" in style_result: | |
| return (True, None) if return_explanation else True | |
| if "style match: no" in style_result: | |
| explanation_prompt = f""" | |
| You are a communication coach. | |
| The following response was judged as **not matching** the profile. Briefly explain why. | |
| --- | |
| Response: {answer} | |
| Style: {style} | |
| Tone: {tone} | |
| Length: {length} | |
| Topics: {topics} | |
| """ | |
| explanation_response = processor_llm.invoke(explanation_prompt) | |
| explanation = explanation_response.content.strip() | |
| logging.warning(f"[Style Match Check] Style mismatch explanation: {explanation}") | |
| return (False, explanation) if return_explanation else False | |
| # Fallback | |
| logging.warning(f"[Style Match Check] Unclear result format: {style_result}") | |
| return (False, f"Unexpected format: {style_result}") if return_explanation else False | |
| except Exception as e: | |
| logging.error(f"[Style Match Check] Exception: {e}") | |
| return (False, str(e)) if return_explanation else False | |
| def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False): | |
| llm_mode_prompt = f""" | |
| You are an expert in market research interview analysis. Given the following question, determine if it is: | |
| - Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.) | |
| - Fact-based: objective, factual, or directly verifiable from the respondent's profile or transcript (e.g., age, location, occupation, education, etc.) | |
| Respondent Type: {respondent_type} | |
| Question: {question} | |
| Output strictly in this format: | |
| Evaluation Mode: <Exploratory or Fact-based> | |
| """ | |
| response = processor_llm.invoke(llm_mode_prompt) | |
| output = response.content.strip() | |
| evaluation_mode = "exploratory" | |
| for line in output.split("\n"): | |
| if line.lower().startswith("evaluation mode:"): | |
| val = line.split(":", 1)[1].strip().lower() | |
| if "fact" in val: | |
| evaluation_mode = "factbased" | |
| else: | |
| evaluation_mode = "exploratory" | |
| logging.info(f"LLM determined evaluation mode: {evaluation_mode}") | |
| if evaluation_mode == "exploratory": | |
| eval_prompt = f""" | |
| You are a market research evaluator. Given the following: | |
| - User Profile: {user_profile_str} | |
| - Fast Facts: {fast_facts_str} | |
| - Interview Transcript: {interview_transcript_text} | |
| - Respondent Type: {respondent_type} | |
| - Question: {question} | |
| - Answer: {answer} | |
| Rate the answer on a scale of 0–10 for: | |
| 1. **Plausibility** – Does the response make sense given what is known about the respondent? | |
| - Consider the respondent’s background, demographics, stated preferences, life stage, interests, and prior responses. | |
| - Is the answer **internally consistent** and **realistic** for someone like this respondent? | |
| - Does it feel like something a person in their position would genuinely say or experience? | |
| - Avoid penalising for style — focus purely on whether the answer is believable and fits the persona. | |
| - A low plausibility score indicates the answer seems fabricated, out of character, contradictory, or implausible for this individual or group. | |
| 2. **Relevance** – Does the answer directly and fully address the specific question asked? | |
| - Check whether the response clearly **answers the intent of the question** without deflection or vagueness. | |
| - Consider whether it provides a complete and meaningful response — not just a surface-level or partial reply. | |
| - Does the answer stay **on-topic** and reflect the subject matter or framing of the original prompt? | |
| - A low relevance score means the answer is off-topic, evasive, only loosely related, or ignores key elements of the question. | |
| Ignore tone, emotional expression, writing style, grammar, or British/American English differences. | |
| Focus **strictly** on the **content quality**, **truthfulness**, and **alignment with the question and user profile**. | |
| Output strictly in this format: | |
| Plausibility Rating: <0-10> | |
| Relevance Rating: <0-10> | |
| If either rating is less than 8, provide a short reason for each below: | |
| Plausibility Reason: <reason> | |
| Relevance Reason: <reason> | |
| """ | |
| eval_response = processor_llm.invoke(eval_prompt) | |
| eval_text = eval_response.content.strip() | |
| plausibility = None | |
| relevance = None | |
| plaus_reason = None | |
| relev_reason = None | |
| for line in eval_text.split("\n"): | |
| if line.lower().startswith("plausibility rating:"): | |
| try: | |
| plausibility = float(line.split(":", 1)[1].strip()) | |
| except Exception as e: | |
| logging.error(f"Error parsing plausibility rating: {e}") | |
| if line.lower().startswith("relevance rating:"): | |
| try: | |
| relevance = float(line.split(":", 1)[1].strip()) | |
| except Exception as e: | |
| logging.error(f"Error parsing relevance rating: {e}") | |
| if line.lower().startswith("plausibility reason:"): | |
| plaus_reason = line.split(":", 1)[1].strip() | |
| if line.lower().startswith("relevance reason:"): | |
| relev_reason = line.split(":", 1)[1].strip() | |
| logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}") | |
| if plausibility is not None and relevance is not None: | |
| valid = plausibility >= 8.0 and relevance >= 8.0 | |
| if return_explanation: | |
| feedback = [] | |
| if plausibility < 8.0 and plaus_reason: | |
| feedback.append(f"Plausibility: {plaus_reason}") | |
| if relevance < 8.0 and relev_reason: | |
| feedback.append(f"Relevance: {relev_reason}") | |
| return valid, "; ".join(feedback) if feedback else None | |
| return valid | |
| if return_explanation: | |
| return False, "Could not parse plausibility/relevance ratings." | |
| return False | |
| else: | |
| logging.info("Performing fact-based evaluation (accuracy)...") | |
| eval_prompt = f""" | |
| You are a market research evaluator. Given the following: | |
| - User Profile: {user_profile_str} | |
| - Fast Facts: {fast_facts_str} | |
| - Interview Transcript: {interview_transcript_text} | |
| - Respondent Type: {respondent_type} | |
| - Question: {question} | |
| - Answer: {answer} | |
| Rate the answer on a scale of 0–10 for: | |
| 1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications? | |
| Ignore tone, phrasing, or style. Focus only on factual correctness. | |
| Output strictly in this format: | |
| Accuracy Rating: <0-10> | |
| If the rating is less than 8, provide a short reason below: | |
| Accuracy Reason: <reason> | |
| """ | |
| eval_response = processor_llm.invoke(eval_prompt) | |
| eval_text = eval_response.content.strip() | |
| accuracy = None | |
| accuracy_reason = None | |
| for line in eval_text.split("\n"): | |
| if line.lower().startswith("accuracy rating:"): | |
| try: | |
| accuracy = float(line.split(":", 1)[1].strip()) | |
| except Exception as e: | |
| logging.error(f"Error parsing accuracy rating: {e}") | |
| if line.lower().startswith("accuracy reason:"): | |
| accuracy_reason = line.split(":", 1)[1].strip() | |
| logging.info(f"Fact-based evaluation: accuracy={accuracy}") | |
| if accuracy is not None: | |
| valid = accuracy >= 8.0 | |
| if return_explanation: | |
| if not valid and accuracy_reason: | |
| return False, accuracy_reason | |
| return valid, None | |
| return valid | |
| if return_explanation: | |
| return False, "Could not parse accuracy rating." | |
| return False |