import os
import json
import gradio as gr
import pandas as pd
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

def read_csv(csv_file):
    df = pd.read_csv(csv_file)
    json_output = convert_df_to_json(df, os.path.splitext(os.path.basename(csv_file))[0])
    prompt = generate_prompt(json_output)
    print("Prompt: ")
    print(prompt)
    response = chat_model.invoke(prompt)
    json_content = response.content
    json_data = json.loads(json_content)
    pretty_json_string = json.dumps(json_data, indent=4)
    print("Response: ")
    print(pretty_json_string)
    return pretty_json_string

def generate_prompt(json_output):
    preamble = """
    Given below is the data found in a relational database table in JSON format:

    """

    mid = """

    For each column, tell me whether the data is of one of the following types: """

    end = """

    Your output should be in the following format:

    { "tableName": "table_name",
        "columns": [
            {"columnName": "column1", "columnType": “one of the types given above”},
            {"columnName": "column2", "columnType": “one of the types given above”},
   	        …
        ]
    }

    """

    prompt = preamble + json.dumps(json_output) + mid + types_str + end
    return prompt

def convert_df_to_json(df, table_name):
    json_output = {
        "tableName": table_name,
        "columns": []
    }

    for column in df.columns:
        column_info = {
            "columnName": column,
            "exampleValues": df[column].tolist()
        }
        json_output["columns"].append(column_info)

    return json_output

known_types = [
    "NAME",
    "ADDRESS",
    "EMAIL ADDRESS",
    "TELEPHONE NUMBER",
    "SOCIAL SECURITY NUMBER",
    "CREDIT CARD NUMBER",
    "UNKNOWN"
]
types_str = ', '.join(map(str, known_types))

llm = OpenAI()
chat_model = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')

with gr.Blocks() as demo:
    file_name = gr.Textbox(label="CSV File")
    output = gr.Textbox(label="Result")
    analyze_btn = gr.Button("Analyze")
    analyze_btn.click(fn=read_csv, inputs=file_name, outputs=output, api_name="read_csv")

if __name__ == "__main__":
    #demo.launch(show_api=False, debug=True)
    demo.launch(debug=True, share=True)