import requests 
from jsondiff import diff
import yaml
import pandas as pd
import os
import shutil
import json
from datetime import datetime

# The purpose of this script is to automate running a bunch of tests
# This script will take an input folder
# The input folder should contain: 
# 1. A file containing a list of the recipe parameters 
# 2. A file containing the input data for each of the schemas 
# 3. ....

# Steps to do this that we will outline then perform 
# First, get the gold standard JSONs from baserow 
# Next, get the recipe parameter list from the input folder 
# Iterate through the recipe parameter list one at a time
# In the iteration, first fill out a surveystack submission - is this possible with the current surveystack API?
# Next, save the surveystack submission ID (?)
# Use the iteration parameters to then get the three JSONs back from chatgpt 
# Compare the JSONs to the gold standard JSONs 
# Print out the differences in a .csv 
# Print out a side by side of the yaml 
# store all these together 
# continue through iterations 
# create downloadables of the results 

BASEROW_API_KEY = os.getenv("BASEROW_API_KEY")
from process_data import process_specifications

def get_baserow_url(table_id): 
    print("GETTING BASEROW URL")
    BASEROW_API_BASE = "https://baserow.f11804a1.federatedcomputer.net/api"
    return f"{BASEROW_API_BASE}/database/rows/table/{table_id}/?user_field_names=true"

def get_baserow_data():
    # This is to get the gold standards from baserow
    # We will also get the input data 

    print("GETTING BASEROW DATA")
    
    TABLE_ID = "560"

    BASEROW_URL = get_baserow_url(TABLE_ID)

    headers = {
        "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
        "Content-Type": "application/json"
    }

    print("STARTING TO TRY RESPONSE REQUEST")
    try: 
        response = requests.get(BASEROW_URL, headers=headers)
        print("GOT")
        response.raise_for_status()
        rows = response.json()
        results = rows.get("results", [])

        print("PARSING ROWS NOW")

        for row in results: 
            print(f"Row ID: {row.get('id')}, Data: {row}")

            if row.get("id") == 2:
                liz_carrot_plantings_gold_standard = row.get("Plantings and Fields - Gold Standard")
                liz_carrot_interactions_gold_standard = row.get("Interactions - Gold Standard")
                liz_carrot_trials_gold_standard = row.get("Trials - Gold Standard")
                
                liz_carrot_input_data_raw_interview = row.get("Raw Interview")
                liz_carrot_otter_summary_preprocessing = row.get("Otter Summary")
                liz_carrot_greg_summary_preprocessing = row.get("Post-Interview Summary")
            elif row.get("id") == 3:
                ben_soybean_plantings_gold_standard = row.get("Plantings and Fields - Gold Standard")
                ben_soybean_interactions_gold_standard = row.get("Interactions - Gold Standard")
                ben_soybean_trials_gold_standard = row.get("Trials - Gold Standard")
                
                ben_soybean_input_data_raw_interview = row.get("Raw Interview")
                ben_soybean_otter_summary_preprocessing = row.get("Otter Summary")
                ben_soybean_greg_summary_preprocessing = row.get("Post-Interview Summary")
            elif row.get("id") == 5:
                wally_squash_plantings_gold_standard = row.get("Plantings and Fields - Gold Standard")
                wally_squash_interactions_gold_standard = row.get("Interactions - Gold Standard")
                wally_squash_trials_gold_standard = row.get("Trials - Gold Standard")
                
                wally_squash_input_data_raw_interview = row.get("Raw Interview")
                wally_squash_otter_summary_preprocessing = row.get("Otter Summary")
                wally_squash_greg_summary_preprocessing = row.get("Post-Interview Summary")

        gold_standards = {
            "liz_carrot": {
                "planting": liz_carrot_plantings_gold_standard,
                "interactions": liz_carrot_interactions_gold_standard,
                "trials": liz_carrot_trials_gold_standard,
            },
            "ben_soybean": {
                "planting": ben_soybean_plantings_gold_standard,
                "interactions": ben_soybean_interactions_gold_standard,
                "trials": ben_soybean_trials_gold_standard,
            },
            "wally_squash": {
                "planting": wally_squash_plantings_gold_standard,
                "interactions": wally_squash_interactions_gold_standard,
                "trials": wally_squash_trials_gold_standard,
            }
        }

        # How to retrieve this data
        # liz_carrot_planting = gold_standards["liz_carrot"]["planting"]

        input_data = {
            "liz_carrot": {
                "raw_interview": liz_carrot_input_data_raw_interview,
                "otter_summary": liz_carrot_otter_summary_preprocessing,
                "greg_summary": liz_carrot_greg_summary_preprocessing
            },
            "ben_soybean": {
                "raw_interview": ben_soybean_input_data_raw_interview,
                "otter_summary": ben_soybean_otter_summary_preprocessing,
                "greg_summary": ben_soybean_greg_summary_preprocessing
            },
            "wally_squash": {
                "raw_interview": wally_squash_input_data_raw_interview,
                "otter_summary": wally_squash_otter_summary_preprocessing,
                "greg_summary": wally_squash_greg_summary_preprocessing
            }
        }

        print("BASEROW DATA DONE GOT")
        print("GOLD STANDARDS HERE")
        print(gold_standards)
        print("INPUT DATA HERE")
        print(input_data)
        return gold_standards, input_data

    except requests.exceptions.RequestException as e: 
        print(f"Failed to fetch rows: {e}")
    
def get_recipes():
    print("GETTING RECIPES FROM BASEROW NOW")

    #TABLE_ID = "588"
    #TABLE_ID = "578"
    #TABLE_ID = "580" This table contains only one row for testing purposes
    TABLE_ID = "589"

    BASEROW_URL = get_baserow_url(TABLE_ID)

    headers = {
        "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
        "Content-Type": "application/json"
    }

    print("TRYING TO GET A RESPONSE")
    try: 
        response = requests.get(BASEROW_URL, headers=headers)
        response.raise_for_status()
        rows = response.json()
        results = rows.get("results", [])

        my_recipes = []
        print("PARSING ROWS")
        for row in results: 
            print(f"Row ID: {row.get('id')}, Data: {row}")
            recipe_id = row.get("Recipe ID")
            testing_strategy_text = row.get("Testing Strategy for Set")
            
            schema_processing_model = row.get("Schema Processing Model", {}).get("value", None)
            pre_processing_strategy = row.get("Pre-Processing Strategy", [{}])[0].get("value", None)
            pre_processing_text = row.get("Pre-Prompt Text")
            pre_processing_model = row.get("Preprocessing Model", {}).get("value", None)
            prompting_strategy = row.get("Prompting Strategy", [{}])[0].get("value", None)
            plantings_and_fields_prompt = row.get("Plantings and Fields Prompting Text")
            interactions_prompt = row.get("Interactions Prompting Text")
            treatments_prompt = row.get("Treatments Prompting Text")

            recipe_dict = {
                "recipe_id": recipe_id,
                "testing_strategy_text": testing_strategy_text,
                "schema_processing_model": schema_processing_model,
                "pre_processing_strategy": pre_processing_strategy,
                "pre_processing_text": pre_processing_text,
                "pre_processing_model": pre_processing_model,
                "prompting_strategy": prompting_strategy,
                "plantings_and_fields_prompt": plantings_and_fields_prompt,
                "interactions_prompt": interactions_prompt,
                "treatments_prompt": treatments_prompt
            }
            
            my_recipes.append(recipe_dict)

        print("FINISHED GETTING THE RECIPE DATA")
        print("RECIPES HERE")
        print(my_recipes)
        return my_recipes    

    except requests.exceptions.RequestException as e: 
        print(f"Failed to fetch rows: {e}")

def fill_out_survey(recipe_dict, input_data): 
    print("filling out survey")
    survey_id = "673b4994aef86f0533b3546c"
    
    base_url = "https://app.surveystack.io/api/submissions"

    if recipe_dict.get("pre_processing_text") is None:
        pre_processing = False
        pre_process = "no"
        pre_process_model_version = "None"
    else:
        pre_processing = True
        pre_process = recipe_dict

    # Set the prompting strategy to be a variable from the list 
    # Do this here 
    
    if pre_processing: 
        submission_data = {
            "survey": survey_id, 
            "data": {
                "inputstyle": "big-block-input-text",
                "onelonginputtext": input_data,
                "schema_prompt": {
                    "firstschemaprompt": recipe_dict["plantings_and_fields_prompt"],
                    "secondschemaprompt": recipe_dict["interactions_prompt"],
                    "thirdschemaprompt": recipe_dict["treatments_prompt"],
                },
            },
            "parameters": {
                "modelversion": recipe_dict["schema_processing_model"],
                "preprocessdata": ["yes"],
                "promptstyle": recipe_dict["prompting_strategy"],
                "preprocessmodelversion": recipe_dict["prompting_strategy"],
                "multiplepreprompts": "no",
                "prepromptstyle": recipe_dict["pre_processing_strategy"],
                "preprocessingprompt1": recipe_dict["pre_processing_text"],
                "preprocessingprompt2": "",
                "preprocessingprompt3": ""
            }
        }
        
    else: 
        submission_data = {
            "survey": survey_id, 
            "data": {
                "inputstyle": "big-block-input-text",
                "onelonginputtext": input_data,
                "schema_prompt": {
                    "firstschemaprompt": recipe_dict["plantings_and_fields_prompt"],
                    "secondschemaprompt": recipe_dict["interactions_prompt"],
                    "thirdschemaprompt": recipe_dict["treatments_prompt"],
                },
            },
            "parameters": {
                "modelversion": recipe_dict["schema_processing_model"],
                "preprocessdata": ["no"],
                "promptstyle": recipe_dict["prompting_strategy"],
                "preprocessmodelversion": None,
                "multiplepreprompts": "no",
                "prepromptstyle": None,
                "preprocessingprompt1": None,
                "preprocessingprompt2": None,
                "preprocessingprompt3": None
                
            }
        }

    headers = {
        "Content-Type": "application/json",
    }

    print("GETTING SURVEY RESPONSE")
    try:
        response = requests.post(base_url, headers=headers, data=json.dumps(submission_data))
        response.raise_for_status()

        if response.status_code == 200:
            print("Submission successful to SurveyStack!")
            print(response.json())
            return submission_data
        else: 
            print(f"Failed to submit: {response.status_code} - {response.text}")
    except requests.exceptions.RequestException as e: 
        print(f"An error occurred while submitting the data: {e}")

def get_data_ready(recipe_dict, input_data_piece):
    ## Input chunk structure
    #     "raw_interview": liz_carrot_input_data_raw_interview,
    # 
    #
    # recipe_dict = {
    #            "recipe_id": recipe_id,
    #            "testing_strategy_text": testing_strategy_text,
    #            "schema_processing_model", schema_processing_model,
    #            "pre_processing_strategy", pre_processing_strategy,
    #            "pre_processing_text", pre_processing_text,
    #            "pre_processing_model", pre_processing_model,
    #            "prompting_strategy", prompting_strategy,
    #            "plantings_and_fields_prompt", plantings_and_fields_prompt,
    #            "interactions_prompt", interactions_prompt,
    #            "treatments_prompt", treatments_prompt
    #        }
    #
    print("GETTING DATA READY")
    processed_data = {}
    processed_data["prompts"] = {}
    
    processed_data["inputstyle"] = 'big-block-input-text'
    processed_data["input_text"] = input_data_piece
    processed_data["prompts"]["firstschemaprompt"] = recipe_dict["plantings_and_fields_prompt"]
    processed_data["prompts"]["secondschemaprompt"] = recipe_dict["interactions_prompt"]
    processed_data["prompts"]["thirdschemaprompt"] = recipe_dict["treatments_prompt"]

    processed_data["parameters"] = {}
    processed_data["parameters"]["modelversion"] = recipe_dict["schema_processing_model"]
    processed_data["parameters"]["promptstyle"] = recipe_dict["prompting_strategy"]

    if (recipe_dict["pre_processing_strategy"] == "None") and (recipe_dict["pre_processing_model"] == "No preprocessing"): 
        processed_data["parameters"]["preprocessdata"] = "no"
    else:
        processed_data["parameters"]["preprocessdata"] = "yes"
        processed_data["parameters"]["preprocessmodelversion"] = recipe_dict["pre_processing_model"]
        processed_data["parameters"]["multiplepreprompts"] =  "no"
        processed_data["parameters"]["prepromptstyle"] = recipe_dict["pre_processing_strategy"]
        processed_data["parameters"]["preprocessingprompt1"] = recipe_dict["pre_processing_text"]
        processed_data["parameters"]["preprocessingprompt2"] = ""
        processed_data["parameters"]["preprocessingprompt3"] = ""

    print("DID THAT NOW")
    return processed_data

def format_json(json_data, truncate_length=500):
    try:
        # Try to load the JSON data
        parsed_data = json.loads(json_data)
        # Convert it into a pretty-printed string
        formatted_json = json.dumps(parsed_data, indent=2)
        # Truncate if it's too long
        return formatted_json[:truncate_length] + "..." if len(formatted_json) > truncate_length else formatted_json
    except json.JSONDecodeError:
        # If it's not valid JSON, return the string as it is
        return json_data[:truncate_length] + "..." if len(json_data) > truncate_length else json_data

# Custom method to handle all objects
def custom_serializer(obj):
    if isinstance(obj, Enum):
        return obj.name  # Or obj.value, depending on what you need
    if isinstance(obj, Soil):
        return obj.to_dict()
    if isinstance(obj, Yield):
        return obj.to_dict()
    return obj.__dict__  # Default case: use the __dict__ method for other custom objects

def sanitize_json_for_yaml(data):
    if isinstance(data, dict):
        return {key: sanitize_json_for_yaml(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [sanitize_json_for_yaml(item) for item in data]
    elif isinstance(data, tuple):  # Convert tuples to lists
        return list(data)
    else:
        return data  # Keep other types as-is

def generate_markdown_output(df):
    # Start the markdown output string
    markdown = ""

    # 1. Input Transcript
    markdown += "\n## Input Transcript\n"
    for _, row in df.iterrows():
        truncated_input = row['Input_Transcript'][:500] + "..." if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
        markdown += f"**Recipe ID {row['Recipe_ID']}**:\n```\n{truncated_input}\n```\n\n"

    # 2. Recipe Fields
    markdown += "\n## Recipe Fields\n"
    recipe_columns = [
        "Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy", 
        "Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
    ]
    recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
    recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
    for _, row in df.iterrows():
        recipe_table += f"| {row['Recipe_ID']} | {row['Testing_Strategy_Text']} | {row['Schema_Processing_Model']} | {row['Pre_Processing_Strategy']} | {row['Pre_Processing_Text']} | {row['Pre_Processing_Model']} | {row['Prompting_Strategy']} |\n"
    markdown += recipe_table + "\n"

     # 3. Differences
    markdown += "\n## Differences\n"
    for _, row in df.iterrows():
        markdown += f"\n### Recipe ID: {row['Recipe_ID']}\n"
        differences = row['Differences']
        
        # Loop through the differences list
        for key, value in differences.items():
            markdown += f"#### {key.capitalize()}\n"
            for item in value:
                markdown += f" - {item}\n"

    # 4. Prompts
    markdown += "\n## Prompts\n"
    prompt_columns = ["Plantings and Fields Prompt", "Interactions Prompt", "Treatments Prompt"]
    prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
    prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
    for _, row in df.iterrows():
        prompt_table += f"| {row['Plantings_and_Fields_Prompt']} | {row['Interactions_Prompt']} | {row['Treatments_Prompt']} |\n"
    markdown += prompt_table + "\n"

    # 5. Side-by-Side JSON Comparisons
    markdown += "\n## Gold Standard vs Machine Generated JSON\n"
    for _, row in df.iterrows():
        markdown += f"\n### Recipe ID: {row['Recipe_ID']}\n"
        for key in ["planting", "interactions", "trials"]:
            gold = json.dumps(row['Gold_Standard_JSON'].get(key, {}), indent=2)
            machine = json.dumps(row['Machine_Generated_JSON'].get(key, {}), default=custom_serializer, indent=2)
            markdown += f"#### {key.capitalize()}\n"
            markdown += f"**Gold Standard JSON**:\n```json\n{gold}\n```\n"
            markdown += f"**Machine Generated JSON**:\n```json\n{machine}\n```\n"

    # 6. Side-by-Side YAML Comparisons
    markdown += "\n## Gold Standard vs Machine Generated YAML\n"
    for _, row in df.iterrows():
        markdown += f"\n### Recipe ID: {row['Recipe_ID']}\n"
        for key in ["planting", "interactions", "trials"]:
            gold = yaml.dump(row['Gold_Standard_JSON'].get(key, {}), default_flow_style=False, sort_keys=True)
            machine = yaml.dump(row['Machine_Generated_JSON'].get(key, {}), default_flow_style=False, sort_keys=True)
            markdown += f"#### {key.capitalize()}\n"
            markdown += f"**Gold Standard YAML**:\n```yaml\n{gold}\n```\n"
            markdown += f"**Machine Generated YAML**:\n```yaml\n{machine}\n```\n"
    
    return markdown

    
def drive_process(): 
    # this is to drive the processing process 
    print("We are starting to DRIVE PROCESS")
    
    # Get the data from baserow (gold standards JSON and Input data)
    gold_standards, input_data = get_baserow_data()

    # Get the recipes from baserow too
    my_recipes = get_recipes()

    # Input chunk structure
    #    "liz_carrot": {
    #            "raw_interview": liz_carrot_input_data_raw_interview,
    #            "otter_summary": liz_carrot_otter_summary_preprocessing,
    #            "greg_summary": liz_carrot_greg_summary_preprocessing
    #        },

    print("Making the OUTPUT STUFF")
    output_folder = "output_results_" +datetime.now().strftime("%Y%m%d_%H%M%S")
    os.makedirs(output_folder, exist_ok=True)

    print("GOING THROUGH RECIPES NOW")
    for recipe_dict in my_recipes:
        for key, input_chunks in input_data.items():
            output_rows = []
            print("RECIPE INFO")
            print(key)
            print(recipe_dict["recipe_id"])

            # Get the input data based on the recipe
            if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
                input_data_piece = input_chunks["otter_summary"]
            elif recipe_dict["pre_processing_strategy"] == "Greg Summary":
                input_data_piece = input_chunks["greg_summary"]
            else:
                input_data_piece = input_chunks["raw_interview"]

            print("DECIDED INPUT DATA")
            print(input_data_piece)

            # Fill out a Surveystack submission
            # This isn't accepted by the data 
            #fill_out_survey(recipe_dict, input_data)

            # Prepare the data for the structured output setup
            proc_spec = get_data_ready(recipe_dict, input_data_piece)

            print("Gold Standard")
            # Get the gold standard for this input_chunk (key = liz_carrot, ben_soybean, wally_squash)
            gold_standard_json = gold_standards[key]

            # "liz_carrot": {
            #    "planting": liz_carrot_plantings_gold_standard,
            #    "interactions": liz_carrot_interactions_gold_standard,
            #    "trials": liz_carrot_trials_gold_standard,
            # },
            
            gold_standard_planting_json = json.loads(gold_standard_json["planting"])
            gold_standard_interactions_json = json.loads(gold_standard_json["interactions"])
            gold_standard_trials_json = json.loads(gold_standard_json["trials"])

            print("Gold standard json after loading")
            print(gold_standard_planting_json)
            
            print("PROCESSING SPECIFICATIONS!!!!!!!!!!!!!!!")
            processed_farm_activity_json, processed_interactions_json, processed_trials_json = process_specifications(proc_spec)

            # THIS SHOULD ONLY BE USED FOR TESTING
            #processed_farm_activity_json = gold_standard_planting_json
            #processed_interactions_json = gold_standard_interactions_json
            #processed_trials_json = gold_standard_trials_json

            processed_farm_activity_json = json.loads(processed_farm_activity_json)
            processed_interactions_json = json.loads(processed_interactions_json)
            processed_trials_json = json.loads(processed_trials_json)

            print("Processed and loaded 1st json from machine gen")
            print(processed_farm_activity_json)
        
            # Compare the generated JSON to the gold standard 
            differences_planting = list(diff(gold_standard_planting_json, processed_farm_activity_json))
            differences_interactions = list(diff(gold_standard_interactions_json, processed_interactions_json))
            differences_trials = list(diff(gold_standard_trials_json, processed_trials_json))

            print("Diff planting")
            print(differences_planting)

            # Convert to yaml
            completed_gold_standard_planting_json = sanitize_json_for_yaml(gold_standard_planting_json)
            completed_gold_standard_interactions_json = sanitize_json_for_yaml(gold_standard_interactions_json)
            completed_gold_standard_trials_json = sanitize_json_for_yaml(gold_standard_trials_json)

            completed_processed_farm_activity_json = sanitize_json_for_yaml(processed_farm_activity_json)
            completed_processed_interactions_json = sanitize_json_for_yaml(processed_interactions_json)
            completed_processed_trials_json = sanitize_json_for_yaml(processed_trials_json)

            json_diff = {
                "planting": differences_planting,
                "interactions": differences_interactions,
                "trials": differences_trials
            }
            
            gold_standard_json = {
                "planting": completed_gold_standard_planting_json,
                "interactions": completed_gold_standard_interactions_json,
                "trials": completed_gold_standard_trials_json
            }

            comparison_json = {
                "planting": completed_processed_farm_activity_json,
                "interactions": completed_processed_interactions_json,
                "trials": completed_processed_trials_json
            }

            recipe_id = recipe_dict.get("recipe_id", "N/A")
            output_rows.append({
                "Recipe_ID": recipe_id,
                "Testing_Strategy_Text": recipe_dict.get("testing_strategy_text", "N/A"),
                "Schema_Processing_Model": recipe_dict.get("schema_processing_model", "N/A"),
                "Pre_Processing_Strategy": recipe_dict.get("pre_processing_strategy", "N/A"),
                "Pre_Processing_Text": recipe_dict.get("pre_processing_text", "N/A"),
                "Pre_Processing_Model": recipe_dict.get("pre_processing_model", "N/A"),
                "Prompting_Strategy": recipe_dict.get("prompting_strategy", "N/A"),
                "Plantings_and_Fields_Prompt": recipe_dict.get("plantings_and_fields_prompt", "N/A"),
                "Interactions_Prompt": recipe_dict.get("interactions_prompt", "N/A"),
                "Treatments_Prompt": recipe_dict.get("treatments_prompt", "N/A"),
                "Input_Transcript": input_chunks,  
                "Gold_Standard_JSON": gold_standard_json,
                "Machine_Generated_JSON": comparison_json,
                "Differences": json_diff
            })

            df = pd.DataFrame(output_rows)

            print("dataframe done now onto markdown")

            markdown_output = generate_markdown_output(df)
            recipe_folder = os.path.join(output_folder, f"recipe_{recipe_dict['recipe_id']}")
            os.makedirs(recipe_folder, exist_ok=True)

            # Save markdown to file
            markdown_file = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_output.md")
            with open(markdown_file, 'w') as f:
                f.write(markdown_output)

            # Save JSON files
            json_file_gold = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_gold_standard.json")
            json_file_generated = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_generated.json")
            with open(json_file_gold, 'w') as f:
                json.dump(gold_standard_json, f, indent=2)
            with open(json_file_generated, 'w') as f:
                json.dump(comparison_json, f, indent=2)

            # Optionally save differences as a separate file
            differences_file = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_differences.json")
            with open(differences_file, 'w') as f:
                f.write(str(differences_file))

    print("ZIPPING UP WHOLE THING")
    # Zip the entire output folder
    zip_filename = f"{output_folder}.zip"
    shutil.make_archive(output_folder, 'zip', output_folder)

    # Cleanup by removing the unzipped folder after zipping it
    shutil.rmtree(output_folder)

    # Return the zip file for downloading
    return zip_filename
    
    return output_folder