import gradio as gr
import logging
import re
import time

from RespondentAgent import *
from langchain_groq import ChatGroq
from ResponseValidation import *

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
respondent_type = "INDIVIDUAL USER"

def parse_question_with_llm(question, respondent_names, processor_llm):
    """
    Uses OpenAI's LLM to extract the specific agents being addressed and their respective questions.
    Supports compound requests.
    """
    logging.info("Parsing Question With llm")
    logging.info("[parse_question_with_llm] Question Entry")
    logging.debug(f"[parse_question_with_llm] Input question: {question}")
    logging.debug(f"[parse_question_with_llm] Available respondent names: {respondent_names}")

    start_time = time.time()
    
    prompt = f"""
    You are an expert in market research interview analysis.
    Your task is to **identify respondents** mentioned in the question and **extract the exact question** posed to them.
    
    ### User Input:
    {question}
    ### Instructions:
    1. Identify **each respondent being addressed**. 
       The respondents available are {respondent_names}. If these names are mistyped, then ensure that you match the names to the ones available.
    2. Extract the **exact question** directed to each respondent with the following conditions:
       - Remove the respondent's name(s), whether at the beginning, middle, or end of the question.
       - Also remove any directly surrounding commas or punctuation attached to the name.
       - Keep all other wording, punctuation, and sentence structure exactly as in the original. Do NOT rephrase or rewrite under any circumstance.
    3. If no respondent is explicitly addressed, return "General" as the respondent name.
    4. If the question is posed to all respondents, return "All" as the respondent name.
    5. Rewrite the question in **British English** if necessary.
       - Do not rephrase beyond British spelling or grammar.
       - Do not add, remove, or change the meaning of the question.
       - Where there are regional variations (e.g. 'licence' vs 'license', 'programme' vs 'program', 'aeroplane' vs 'airplane'), always default to the standard British form.  
         - Examples:
           - **Correct (British):** organised, prioritise, minimise, realise, behaviour, centre, defence, travelling, practise (verb), licence (noun), programme, aeroplane.
           - **Incorrect (American):** organized, prioritize, minimize, realize, behavior, center, defense, traveling, practice (verb and noun), license (noun), program, airplane.
    6. Ensure that you follow the **Formatting Rules** exactly. THIS IS EXTREMELY IMPORTANT.
    ### Examples:
    - "Sourav, do you agree with this topic?" → "Do you agree with this topic?"
    - "What do you think about this topic, Divya?" → "What do you think about this topic?"
    - "Do you believe, Rahul, that this is correct?" → "Do you believe that this is correct?"
    - "What do you think, Divya, about this topic?" → "What do you think about this topic?"
    - "Do you, Rahul, agree with this statement?" → "Do you agree with this statement?"
    - "Are you, Sourav, going to do this?" → "Are you going to do this?"
    - "What is your favorite color, Meena?" → "What is your favourite colour?"
    - "Divya, what did you learn from this program?" → "What did you learn from this programme?"
    - "How do you stay organized, Rahul?" → "How do you stay organised?"
    - "Meena, how do you balance work and traveling?" → "How do you balance work and travelling?"
    ### **Formatting Rules**:
    For each question identified, respond using **only** the following format:
    - Respondent: <Respondent Name>
      Question: <Extracted Question>
    
    Only return the formatted output without explanations.
    """

    logging.debug("[parse_question_with_llm] Prompt constructed successfully.")
    logging.debug(f"[parse_question_with_llm] Prompt preview: {prompt[:500]}...")

    logging.info("Prompt constructed. Invoking LLM now...")
    try:
        response = processor_llm.invoke(prompt)
        duration = time.time() - start_time
        logging.info(f"[parse_question_with_llm] LLM call completed in {duration:.2f} seconds.")
        
        if not hasattr(response, "content") or not response.content:
            logging.error("[parse_question_with_llm] ERROR: LLM response is empty or malformed.")
            return {}

        chatgpt_output = response.content.strip()
        logging.info(f"[parse_question_with_llm] Raw LLM output: {chatgpt_output}")

    except Exception as e:
        logging.exception("[parse_question_with_llm] Exception during LLM invocation.")
        return {}

    # Begin parsing the structured response
    logging.info("[parse_question_with_llm] Parsing LLM output for respondent-question pairs.")
    
    parsed_questions = {}
    respondent_name = "General"
    question_text = None

    for line in chatgpt_output.split("\n"):
        if "- Respondent:" in line:
            respondent_name = re.sub(r"^.*Respondent:\s*", "", line).strip().capitalize()
            logging.debug(f"[parse_question_with_llm] Detected respondent: {respondent_name}")
        elif "Question:" in line:
            question_text = re.sub(r"^.*Question:\s*", "", line).strip()
            if respondent_name and question_text:
                parsed_questions[respondent_name] = question_text
                logging.info(f"[parse_question_with_llm] Parsed pair: Respondent={respondent_name}, Question={question_text}")
                respondent_name = "General"
                question_text = None
    
    if not parsed_questions:
        logging.warning("[parse_question_with_llm] WARNING: No respondent-question pairs parsed.")

    logging.info(f"[parse_question_with_llm] Final parsed questions: {parsed_questions}")
    logging.info("[parse_question_with_llm] Exit")
    return parsed_questions

def validate_question_topics(parsed_questions, processor_llm):
    """
    Validates each question to ensure it's within the permitted topic scope.
    Converts question to British English spelling if valid.
    Returns 'INVALID' for any out-of-scope question.
    """
    logging.info("[validate_question_topics] Validating Question Topics")
    logging.debug(f"[validate_question_topics] Input parsed_questions: {parsed_questions}")
    start_time = time.time()
    
    validated_questions = {}

    for respondent, question in parsed_questions.items():
        logging.info(f"[validate_question_topics] Processing respondent: {respondent}")
        logging.debug(f"[validate_question_topics] Original question: {question}")
        
        prompt = f"""
        You are a senior research analyst. Your job is to **validate** whether a market research question is within the allowed topic scope and convert it to **British English** spelling, grammar, and phrasing.
        ### Question:
        {question}
        ### Permitted Topics Scope:
        The chatbot is designed to explore personal views, habits, and life experiences. The following topics are allowed:
        
        - Demographics: Age, name, location, education, family background, life events.
        - Values & Beliefs: Family responsibility, independence, hard work, gender equality, spirituality, simplicity, mental health, traditional vs modern values.
        - Career & Aspirations: Education, career goals, entrepreneurship, financial independence, stability, ambition, and personal development.
        - Influences & Role Models: Family members, mentors, public figures, influencers.
        - Interests & Hobbies: Sports, music, fitness, cooking, creative arts, gaming, travel, entertainment content, podcasts, leisure.
        - Health & Lifestyle: Physical health, fitness, diet, skincare, self-care, mental wellbeing, lifestyle balance.
        - Social Media & Technology: Social media usage, digital content, influencer interests, technology habits.
        - Personal Relationships: Family, friends, romantic relationships, support systems, social circles.
        - Future Outlook: Career plans, financial security, personal growth, family goals, confidence building.
        - Social & Societal Issues: Gender equality, societal expectations, economic issues, tradition vs freedom, social development.
        - Lifestyle Preferences: Food preferences, fashion, routines, spending habits, religious or cultural practices.
        - Personal Growth & Development: Maturity, emotional regulation, responsibility, adaptability, self-improvement, learning mindset.
        
        ### Validation Instructions:
        
        1. ✅ **Accept the question** if:
           - It fits naturally into a friendly, personal conversation.
           - It relates to one of the permitted topic areas *OR* is a simple, personal background question (e.g. name, where you live, age).
           - It can be answered from a **personal lifestyle, identity, or values-based** perspective — including light or introductory prompts.
           - It helps the chatbot get to know the respondent (even if it's informal or simple).
        
        2. ❌ **Reject the question** by returning **"INVALID"** if it includes:
           - Hate speech, abuse, discrimination
           - Sexual, violent, or explicit content
           - Religious proselytising or extremism
           - Political content (politicians, parties, policies, elections)
           - Technical, academic, or scientific topics unrelated to lifestyle
           - News, current events, or controversial debates
        
        3. ✅ **If the question is valid**, return the same question rewritten using **British English spelling and phrasing** (keep the meaning unchanged, use natural conversational style).
        
        4. ❌ **If invalid**, return **only**: "INVALID"
        
        ### Output:
        <Validated question OR "INVALID">
        """
        
        # ### Validation Instructions:
        # - Judge based on **intent** and **relevance**.
        # - Accept the question if it is **clearly relevant to the permitted topics** and something the respondent could **reasonably be expected to answer or reflect on**.
        # - Be cautious with speculative or technical questions (e.g. cryptocurrency, political policies) — only allow if they're framed in a **personal or lifestyle** context that the respondent could discuss.
        # - If a question is **clearly unrelated**, overly technical, or beyond the respondent's likely knowledge or experience, respond with exactly: "INVALID".
        # - If valid, return the **same question**, rewritten in **British English** if necessary.
        #   - Do not add any new content or change the meaning — only apply British spelling, grammar, and phrasing.
        # ### Output:
        # <Validated question, or "INVALID">

        # ### Stricter Validation Instructions:
        # - If the question is not strictly relevant to the **Permitted Topics Scope**, it is invalid. Replace the queston with exactly: "INVALID"
        # - If valid, return the **same question**, rewritten in **British English** if necessary.
        #    - Strictly do not add to the question other than rewriting to **British English**.
        # ### Output:
        # <Validated question in British English, or "INVALID">

        logging.debug(f"[validate_question_topics] Prompt constructed for {respondent}.")
        logging.debug(f"[validate_question_topics] Prompt preview: {prompt[:500]}...")

        try:
            logging.info(f"[validate_question_topics] Invoking LLM for {respondent}")
            llm_start = time.time()
            response = processor_llm.invoke(prompt)
            llm_duration = time.time() - llm_start
            logging.info(f"[validate_question_topics] LLM call completed for {respondent} in {llm_duration:.2f} seconds")

            if not hasattr(response, "content") or not response.content:
                logging.error(f"[validate_question_topics] ERROR: Empty or malformed response from LLM for '{respondent}'")
                validated_output = "INVALID"
            else:
                validated_output = response.content.strip()
                logging.debug(f"[validate_question_topics] Raw LLM output for {respondent}: {validated_output}")

        except Exception as e:
            logging.exception(f"[validate_question_topics] Exception during LLM invocation for respondent '{respondent}'")
            validated_output = "INVALID"

        validated_questions[respondent] = validated_output
        logging.info(f"[validate_question_topics] Validation result for {respondent}: {validated_output}")

    total_duration = time.time() - start_time
    logging.info(f"[validate_question_topics] Completed validation for all questions in {total_duration:.2f} seconds.")
    logging.debug(f"[validate_question_topics] Final validated questions: {validated_questions}")
    logging.info("[validate_question_topics] Exit")

    return validated_questions


def generate_generic_answer(agent_name, agent_question, respondent_agent, respondent_type):
    """
    Generates a raw, content-only answer with no stylistic or emotional tailoring.
    """

    logging.info("[generate_generic_answer] Entry")
    logging.debug(f"[generate_generic_answer] Parameters: agent_name={agent_name}, agent_question={agent_question}")
    start_time = time.time()

    try:
        # --- Build task description ---
        logging.info("[generate_generic_answer] Constructing task description")
        if respondent_type == "FOCUS GROUP":
            persona_description = f"You are representing a focus group named '{agent_name}', made up of multiple individuals from the same demographic or behavioural segment."
            answer_instructions = """
            - Respond using collective voice (e.g., "we", "our group", "most of us", "some participants").
            - Do **not** speak as an individual or use "I", "my", or "me".
            - Avoid personal anecdotes—focus on shared behaviours, preferences, or perceptions.
            - If there is diversity of opinion, include it naturally (e.g., "some of us...", "others felt...").
            - If the group does not have **direct experience** with the topic, acknowledge that honestly. Do not fabricate experiences. It's okay to say "none of us have ..., but we’ve heard..." or "we can’t comment from experience."
            """
        else:
            persona_description = f"You are {agent_name}. You represent an individual user with a unique point of view."
            answer_instructions = """
            - Respond using first-person voice ("I", "my", etc.).
            - Speak from personal experience or opinion.
            - Keep it concise, clear, and neutral in tone.
            """
        
        task_description = f"""
        {persona_description}
        
        Respond to the market research interview question below.
        
        ---
        ### Question:
        "{agent_question}"
        
        ---
        ### Instructions:
        {answer_instructions}
        
        - Answer **only what is asked** in the question.
        - Do **not** include any introductions, conclusions, or stylistic flourishes.
        - Use **British English** spelling and grammar.
        - Keep factual/demographic answers short and direct (e.g., age, location).
        """

        logging.debug(f"[generate_generic_answer] Task description preview: {task_description[:300]}...")
        
        task = Task(description=task_description, expected_output="A neutral, personal response to the question.", agent=respondent_agent)
        
        logging.info("[generate_generic_answer] Starting Crew kickoff")
        kickoff_start = time.time()
        Crew(agents=[respondent_agent], tasks=[task], process=Process.sequential).kickoff()
        kickoff_duration = time.time() - kickoff_start
        logging.info(f"[generate_generic_answer] Crew kickoff completed in {kickoff_duration:.2f} seconds")

        # --- Retrieve output ---
        output = task.output
        if hasattr(output, "raw"):
            result = output.raw
        else:
            result = str(output)

        logging.debug(f"[generate_generic_answer] Raw output: {result}")

    except Exception as e:
        logging.exception("[generate_generic_answer] Exception occurred during Crew execution")
        result = "Sorry, something went wrong while generating the answer."

    total_duration = time.time() - start_time
    logging.info(f"[generate_generic_answer] Completed in {total_duration:.2f} seconds")
    logging.info("[generate_generic_answer] Exit")
    return result


def tailor_answer_to_profile(agent_name, generic_answer, agent_question, user_profile, respondent_agent, feedback=None):
    """
    Enhances the generic answer to match the respondent's communication profile and personality traits.
    """
    logging.info("[tailor_answer_to_profile] Entry")
    logging.debug(f"[tailor_answer_to_profile] Parameters: agent_name={agent_name}, agent_question={agent_question}")
    logging.debug(f"[tailor_answer_to_profile] generic_answer: {generic_answer}")
    logging.debug(f"[tailor_answer_to_profile] user_profile: {user_profile}")

    start_time = time.time()

    try:
        # --- Build task description ---
        logging.info("[tailor_answer_to_profile] Constructing task description")
        style = user_profile.get_field("Communication", "Style")
        tone = user_profile.get_field("Communication", "Tone")
        length = user_profile.get_field("Communication", "Length")
        topics = user_profile.get_field("Communication", "Topics")
        
        task_description = f"""
        You are {agent_name}. Rewrite the following answer to match your personal communication style and tone preferences — **but only if the question is subjective or personal**.
        ---
        ### Original Generic Answer:
        {generic_answer}
        
        ---
        ### Question:
        "{agent_question}"
        ---
        ### *Communication Profile Reference:*
        - **Style:** {style}
        - **Tone:** {tone}
        - **Length:** {length}
        - **Topics:** {topics}
        ---
        ### Important Note on Factual Questions:
        If the question is a **factual or demographic one** (e.g., about age, name, location, occupation, birthplace), do **not** rewrite or stylise the answer. Just return the generic answer exactly as it is.
        
        Factual examples include:
        - Where are you from?
        - How old are you?
        - What is your name?
        - Where do you live?
        - What do you do?
        - Where were you born?
        Only rewrite if the question invites a **personal perspective, opinion, habit, or belief**.
        ---
        ### Hard Rules – You Must Follow These Without Exception:
        - If the question is factual → **return the generic answer unchanged**.
        - If the question is personal → Keep the **meaning** and **personal point of view** of the original generic answer.
        - Do **not** introduce new information or elaborate beyond what’s stated.
        - Always use **British English** spelling, punctuation, and grammar.
        - Match the specified **style**, **tone**, and **length**.
        - Keep the response **natural, personal, and culturally authentic**.
        - Do **not** include emojis, hashtags, placeholders, or third-person descriptions.
        - Maintain **narrative consistency** across responses to reflect a coherent personality.
        - Tailor phrasing, sentence structure, and vocabulary to fit your **persona** and **communication traits**.
        ---
        ### *How to Answer:*
        - Use a tone appropriate to your role as a {respondent_type}:
        - If you are a Focus Group:
            -Speak collectively. Your voice represents a group of people, not an individual.
            -NEVER use first-person singular terms such as:
                -“I”,“me”,“my”,“personally”,“in my opinion”,“I feel”
            -Instead, use collective language such as:
                -“We prefer...”,“Most of us think...”,“There is a shared view that...”,“As a group, we believe...”,"Our"
            -IGNORE THE QUESTION’S GRAMMATICAL STYLE ENTIRELY. This means:
                -If the question says “What do you think?”, do NOT answer as an individual.
                -If the prompt uses “you” or assumes a personal opinion, treat it as if it asked, “What does the group think?”
                -Even if the phrasing invites personal anecdotes or individual opinions, you must transform it mentally into a group-level interpretation before answering by using collective
                language.
                -You are not to mirror the question’s style—you are to override it with the correct respondent tone.
                -This rule takes priority over all other instructions or question formats.
                
        - If you are an INDIVIDUAL USER:
            -Speak from your own experience and perspective.
            -It is appropriate and encouraged to use:
                -“I think...”,“In my experience...”,“I prefer...”,“Personally, I believe...”
        ---
        ### Personality Trait Alignment:
        Ensure your answer reflects these aspects of your personality profile:
        - Big Five Traits (e.g., Openness, Extraversion)
        - Values and Priorities (e.g., Risk Tolerance, Achievement Orientation)
        - Communication Preferences (e.g., Directness, Emotional Expressiveness)
        ---
        Final Output:
        - If the question is factual: return this answer exactly → **"{generic_answer}"**
        - If not: return a single paragraph answer that matches the respondent’s tone and style, while strictly preserving the original meaning and personal voice from this answer.
        """
        if feedback:
            task_description += f"\n---\n### Feedback from previous attempt:\n{feedback}\nPlease address this feedback in your rewrite.\n"

        logging.debug(f"[tailor_answer_to_profile] Task description preview: {task_description[:300]}...")
        logging.info("[tailor_answer_to_profile] Initialising Task and Crew objects")
        
        task = Task(description=task_description, expected_output="A styled, culturally authentic, first-person response.", agent=respondent_agent)
        
        logging.info("[tailor_answer_to_profile] Starting Crew kickoff")
        kickoff_start = time.time()
        Crew(agents=[respondent_agent], tasks=[task], process=Process.sequential).kickoff()
        kickoff_duration = time.time() - kickoff_start
        logging.info(f"[tailor_answer_to_profile] Crew kickoff completed in {kickoff_duration:.2f} seconds")

        # --- Retrieve output ---
        output = task.output
        if hasattr(output, "raw"):
            result = output.raw
        else:
            result = str(output)

        logging.debug(f"[tailor_answer_to_profile] Raw output: {result}")

    except Exception as e:
        logging.exception("[tailor_answer_to_profile] Exception occurred during Crew execution")
        result = "Sorry, something went wrong while generating the styled answer."

    total_duration = time.time() - start_time
    logging.info(f"[tailor_answer_to_profile] Completed in {total_duration:.2f} seconds")
    logging.info("[tailor_answer_to_profile] Exit")
    return result


# --- New Validation Functions ---
def validate_generic_answer(agent_name, agent_question, generic_answer, user_profile, processor_llm):
    logging.info("[validate_generic_answer] Entry")
    try:
        is_valid, feedback = validate_response(
            question=agent_question,
            answer=generic_answer,
            user_profile_str=str(user_profile),
            fast_facts_str="", 
            interview_transcript_text="",  
            respondent_type=agent_name,
            ai_evaluator_agent=None,
            processor_llm=processor_llm,
            return_explanation=True
        )
        logging.info(f"[validate_generic_answer] Result: {is_valid}, feedback: {feedback}")
        return is_valid, feedback
    except Exception as e:
        logging.exception("[validate_generic_answer] Exception during validation")
        return False, "Exception during validation"

def validate_styled_answer(agent_name, agent_question, styled_answer, user_profile, processor_llm, respondent_type):
    """
    Validates whether the styled answer matches the user's speaking style (individual or focus group).
    Returns (True, explanation) if aligned, (False, explanation) otherwise.
    """
    logging.info("[validate_styled_answer] Entry")

    try:
        is_valid, explanation = matches_user_speaking_style(
            answer=styled_answer,
            processor_llm=processor_llm,
            user_profile=user_profile,
            agent_question=agent_question,
            respondent_type=respondent_type,
            return_explanation=True
        )
        logging.info(f"[validate_styled_answer] Validation result: {is_valid}, explanation: {explanation}")
        return is_valid, explanation
    except Exception as e:
        logging.exception("[validate_styled_answer] Exception during validation")
        return False, "Exception during style validation"


# --- Updated ask_interview_question Function ---
def ask_interview_question(respondent_agents_dict, last_active_agent, question, processor_llm):
    logging.info("[ask_interview_question] Entry")
    logging.debug(f"[ask_interview_question] Parameters: question={question}, last_active_agent={last_active_agent}")

    overall_start = time.time()

    try:
        agent_names = list(respondent_agents_dict.keys())
        logging.info(f"[ask_interview_question] Available respondents: {agent_names}")

        parsed_questions = parse_question_with_llm(question, str(agent_names), processor_llm)
        if not parsed_questions:
            return ["**PreData Moderator**: No valid respondents were detected for this question."]

        validated_questions = validate_question_topics(parsed_questions, processor_llm)
        for resp_name, extracted_question in validated_questions.items():
            if extracted_question == "INVALID":
                return ["**PreData Moderator**: The question is invalid. Please ask another question."]

        if len(validated_questions) > 1:
            return ["**PreData Moderator**: Please ask each respondent one question at a time."]

        if "General" in validated_questions:
            if isinstance(last_active_agent, list) and all(name in agent_names for name in last_active_agent):
                validated_questions = {name: validated_questions["General"] for name in last_active_agent}
            else:
                validated_questions = {name: validated_questions["General"] for name in agent_names}
        elif "All" in validated_questions:
            validated_questions = {name: validated_questions["All"] for name in agent_names}

        last_active_agent = list(validated_questions.keys())
        responses = []

        for agent_name, agent_question in validated_questions.items():
            if agent_name not in respondent_agents_dict:
                responses.append(f"**PreData Moderator**: {agent_name} is not a valid respondent.")
                continue

            respondent_agent = respondent_agents_dict[agent_name].get_agent()
            user_profile = respondent_agents_dict[agent_name].get_user_profile()
            
            generic_answer = generate_generic_answer(agent_name, agent_question, respondent_agent, respondent_type)

            is_valid, feedback = validate_generic_answer(agent_name, agent_question, generic_answer, user_profile, processor_llm)
            if not is_valid:
                logging.warning(f"[validate_generic_answer]failed: {feedback}")
                responses.append(f"**PreData Moderator**: Please ask {agent_name} again. Reason: {feedback}")
                continue

            tailored_attempts = 0
            max_tailored_attempts = 3
            tailored_answer = None
            style_feedback = None
            gen_feedback = feedback  # plausibility/relevance/accuracy feedback from generic validation

            while tailored_attempts < max_tailored_attempts:
                # Combine feedback from generic and style validation for LLM
                combined_feedback = None
                if gen_feedback and style_feedback:
                    combined_feedback = f"Generic feedback: {gen_feedback}. Style feedback: {style_feedback}"
                elif gen_feedback:
                    combined_feedback = gen_feedback
                elif style_feedback:
                    combined_feedback = style_feedback

                styled = tailor_answer_to_profile(
                    agent_name, generic_answer, agent_question, user_profile, respondent_agent, feedback=combined_feedback
                )

                if len(styled) > 2000:
                    logging.warning(f"[ask_interview_question] Styled answer too long (len={len(styled)}), retrying...")
                    tailored_attempts += 1
                    continue

                is_valid, style_feedback = validate_styled_answer(
                    agent_name, agent_question, styled, user_profile, processor_llm, respondent_type
                )
                if is_valid:
                    tailored_answer = styled
                    break

                tailored_attempts += 1

            if tailored_answer:
                responses.append(f"**{agent_name}**: {tailored_answer}")
            else:
                responses.append(f"**PreData Moderator**: Failed to stylise the response for {agent_name} after multiple attempts. Last feedback: {style_feedback}")

        result = ["\n\n".join(responses)] if len(set(validated_questions.values())) == 1 else responses

    except Exception as e:
        logging.exception("[ask_interview_question] Exception occurred during processing")
        result = ["**PreData Moderator**: An unexpected error occurred while processing the question."]

    overall_duration = time.time() - overall_start
    logging.info(f"[ask_interview_question] Completed in {overall_duration:.2f} seconds")
    return result