import logging
import gradio as gr
import re

from RespondentAgent import *
from langchain_groq import ChatGroq

def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, respondent_type="INDIVIDUAL", return_explanation=False):
    logging.info("[Style Match Check] Entry")

    try:
        # --- Step 1: Skip style check for factual questions ---
        factual_keywords = [
            "name", "age", "where are you from", "where do you live", "occupation",
            "birthplace", "what do you do", "how old", "which city", "which country"
        ]
        lower_q = agent_question.strip().lower()
        is_factual = any(kw in lower_q for kw in factual_keywords)
        if is_factual:
            logging.info("[Style Match Check] Question is factual — skipping strict style enforcement")
            if return_explanation:
                return True, None
            return True

        # --- Step 2: First-person or collective pronoun check ---
        logging.info(f"[Style Match Check] Performing {'collective' if respondent_type == 'FOCUS GROUP' else 'first-person'} pronoun check")

        if respondent_type == "FOCUS GROUP":
            pronoun_prompt = f"""
You are an expert in writing style analysis.
Determine whether the following response is appropriate for a **focus group**, which must:
- Use collective language ("we", "our", "us", "some of us", "most participants")
- Avoid any first-person singular language ("I", "me", "my", etc.)
- Speak as a group, not as an individual
Check the response below and answer in the following format:
Focus Group Style: Yes
or
Focus Group Style: No
Reason: <short reason>
---
### Question:
{agent_question}
### Response:
{answer}
"""
            response = processor_llm.invoke(pronoun_prompt)
            result = response.content.strip().lower()

            if "focus group style: no" in result:
                explanation = result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in result else "The response does not follow focus group voice."
                logging.warning(f"[Style Match Check] Failed group tone: {explanation}")
                return (False, explanation) if return_explanation else False
        else:
            # INDIVIDUAL — use first-person pronoun validation
            fp_prompt = f"""
You are an expert in writing style analysis.
Determine whether the following response uses a personal **first-person** tone, appropriate for an individual.
- Look for use of "I", "me", "my", "mine", or implied personal ownership.
- Skip judgment on content quality or grammar — just the perspective.
Respond using this format:
First Person: Yes
or
First Person: No
Reason: <short explanation>
---
### Question:
{agent_question}
### Response:
{answer}
"""
            fp_response = processor_llm.invoke(fp_prompt)
            fp_result = fp_response.content.strip().lower()

            if "first person: no" in fp_result:
                explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person."
                logging.warning(f"[Style Match Check] Failed first-person test: {explanation}")
                return (False, explanation) if return_explanation else False

        # --- Step 3: Communication style match ---
        style = user_profile.get_field("Communication", "Style")
        tone = user_profile.get_field("Communication", "Tone")
        length = user_profile.get_field("Communication", "Length")
        topics = user_profile.get_field("Communication", "Topics")

        style_check_prompt = f"""
You are a communication coach and writing style analyst.
Evaluate how well the following response aligns with the given communication profile.
---
### Response:
{answer}
### Communication Profile:
- Style: {style}
- Tone: {tone}
- Preferred Length: {length}
- Common Topics: {topics}
---
### Instructions:
Assess whether the response reflects the user's typical communication style.
Respond with only one of:
- Style Match: Yes
- Style Match: Mostly
- Style Match: No
"""
        style_response = processor_llm.invoke(style_check_prompt)
        style_result = style_response.content.strip().lower()

        if "style match: yes" in style_result or "style match: mostly" in style_result:
            return (True, None) if return_explanation else True

        if "style match: no" in style_result:
            explanation_prompt = f"""
You are a communication coach.
The following response was judged as **not matching** the profile. Briefly explain why.
---
Response: {answer}
Style: {style}
Tone: {tone}
Length: {length}
Topics: {topics}
"""
            explanation_response = processor_llm.invoke(explanation_prompt)
            explanation = explanation_response.content.strip()
            logging.warning(f"[Style Match Check] Style mismatch explanation: {explanation}")
            return (False, explanation) if return_explanation else False

        # Fallback
        logging.warning(f"[Style Match Check] Unclear result format: {style_result}")
        return (False, f"Unexpected format: {style_result}") if return_explanation else False

    except Exception as e:
        logging.error(f"[Style Match Check] Exception: {e}")
        return (False, str(e)) if return_explanation else False

def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
    llm_mode_prompt = f"""
You are an expert in market research interview analysis. Given the following question, determine if it is:
- Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
- Fact-based: objective, factual, or directly verifiable from the respondent's profile or transcript (e.g., age, location, occupation, education, etc.)
Respondent Type: {respondent_type}
Question: {question}
Output strictly in this format:
Evaluation Mode: <Exploratory or Fact-based>
"""
    response = processor_llm.invoke(llm_mode_prompt)
    output = response.content.strip()
    evaluation_mode = "exploratory"
    for line in output.split("\n"):
        if line.lower().startswith("evaluation mode:"):
            val = line.split(":", 1)[1].strip().lower()
            if "fact" in val:
                evaluation_mode = "factbased"
            else:
                evaluation_mode = "exploratory"
    logging.info(f"LLM determined evaluation mode: {evaluation_mode}")

    if evaluation_mode == "exploratory":
        eval_prompt = f"""
You are a market research evaluator. Given the following:
- User Profile: {user_profile_str}
- Fast Facts: {fast_facts_str}
- Interview Transcript: {interview_transcript_text}
- Respondent Type: {respondent_type}
- Question: {question}
- Answer: {answer}
Rate the answer on a scale of 0–10 for:
1. **Plausibility** – Does the response make sense given what is known about the respondent?
   - Consider the respondent’s background, demographics, stated preferences, life stage, interests, and prior responses.
   - Is the answer **internally consistent** and **realistic** for someone like this respondent?
   - Does it feel like something a person in their position would genuinely say or experience?
   - Avoid penalising for style — focus purely on whether the answer is believable and fits the persona.
   - A low plausibility score indicates the answer seems fabricated, out of character, contradictory, or implausible for this individual or group.
2. **Relevance** – Does the answer directly and fully address the specific question asked?
   - Check whether the response clearly **answers the intent of the question** without deflection or vagueness.
   - Consider whether it provides a complete and meaningful response — not just a surface-level or partial reply.
   - Does the answer stay **on-topic** and reflect the subject matter or framing of the original prompt?
   - A low relevance score means the answer is off-topic, evasive, only loosely related, or ignores key elements of the question.
Ignore tone, emotional expression, writing style, grammar, or British/American English differences.
Focus **strictly** on the **content quality**, **truthfulness**, and **alignment with the question and user profile**.
Output strictly in this format:
Plausibility Rating: <0-10>
Relevance Rating: <0-10>
If either rating is less than 8, provide a short reason for each below:
Plausibility Reason: <reason>
Relevance Reason: <reason>
"""
        eval_response = processor_llm.invoke(eval_prompt)
        eval_text = eval_response.content.strip()
        plausibility = None
        relevance = None
        plaus_reason = None
        relev_reason = None
        for line in eval_text.split("\n"):
            if line.lower().startswith("plausibility rating:"):
                try:
                    plausibility = float(line.split(":", 1)[1].strip())
                except Exception as e:
                    logging.error(f"Error parsing plausibility rating: {e}")
            if line.lower().startswith("relevance rating:"):
                try:
                    relevance = float(line.split(":", 1)[1].strip())
                except Exception as e:
                    logging.error(f"Error parsing relevance rating: {e}")
            if line.lower().startswith("plausibility reason:"):
                plaus_reason = line.split(":", 1)[1].strip()
            if line.lower().startswith("relevance reason:"):
                relev_reason = line.split(":", 1)[1].strip()
        logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
        if plausibility is not None and relevance is not None:
            valid = plausibility >= 8.0 and relevance >= 8.0
            if return_explanation:
                feedback = []
                if plausibility < 8.0 and plaus_reason:
                    feedback.append(f"Plausibility: {plaus_reason}")
                if relevance < 8.0 and relev_reason:
                    feedback.append(f"Relevance: {relev_reason}")
                return valid, "; ".join(feedback) if feedback else None
            return valid
        if return_explanation:
            return False, "Could not parse plausibility/relevance ratings."
        return False

    else:
        logging.info("Performing fact-based evaluation (accuracy)...")
        eval_prompt = f"""
You are a market research evaluator. Given the following:
- User Profile: {user_profile_str}
- Fast Facts: {fast_facts_str}
- Interview Transcript: {interview_transcript_text}
- Respondent Type: {respondent_type}
- Question: {question}
- Answer: {answer}
Rate the answer on a scale of 0–10 for:
1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications?
Ignore tone, phrasing, or style. Focus only on factual correctness.
Output strictly in this format:
Accuracy Rating: <0-10>
If the rating is less than 8, provide a short reason below:
Accuracy Reason: <reason>
"""
        eval_response = processor_llm.invoke(eval_prompt)
        eval_text = eval_response.content.strip()
        accuracy = None
        accuracy_reason = None
        for line in eval_text.split("\n"):
            if line.lower().startswith("accuracy rating:"):
                try:
                    accuracy = float(line.split(":", 1)[1].strip())
                except Exception as e:
                    logging.error(f"Error parsing accuracy rating: {e}")
            if line.lower().startswith("accuracy reason:"):
                accuracy_reason = line.split(":", 1)[1].strip()
        logging.info(f"Fact-based evaluation: accuracy={accuracy}")
        if accuracy is not None:
            valid = accuracy >= 8.0
            if return_explanation:
                if not valid and accuracy_reason:
                    return False, accuracy_reason
                return valid, None
            return valid
        if return_explanation:
            return False, "Could not parse accuracy rating."
        return False