File size: 2,292 Bytes
af06011
 
2cf4acc
5fb3fab
471b449
 
2cf4acc
af06011
 
 
471b449
f6d39aa
 
471b449
 
 
 
f6d39aa
 
471b449
5fb3fab
af06011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471b449
 
 
 
 
 
 
 
 
af06011
471b449
 
 
 
 
 
 
 
 
 
2c5a9f9
68053c7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import json
import gradio as gr
import pandas as pd
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

def read_csv(csv_file):
    df = pd.read_csv(csv_file)
    json_output = convert_df_to_json(df, os.path.splitext(os.path.basename(csv_file))[0])
    prompt = generate_prompt(json_output)
    print("Prompt: ")
    print(prompt)
    response = chat_model.invoke(prompt)
    json_content = response.content
    json_data = json.loads(json_content)
    pretty_json_string = json.dumps(json_data, indent=4)
    print("Response: ")
    print(pretty_json_string)
    return pretty_json_string

def generate_prompt(json_output):
    preamble = """
    Given below is the data found in a relational database table in JSON format:

    """

    mid = """

    For each column, tell me whether the data is of one of the following types: """

    end = """

    Your output should be in the following format:

    { "tableName": "table_name",
        "columns": [
            {"columnName": "column1", "columnType": “one of the types given above”},
            {"columnName": "column2", "columnType": “one of the types given above”},

        ]
    }

    """

    prompt = preamble + json.dumps(json_output) + mid + types_str + end
    return prompt

def convert_df_to_json(df, table_name):
    json_output = {
        "tableName": table_name,
        "columns": []
    }

    for column in df.columns:
        column_info = {
            "columnName": column,
            "exampleValues": df[column].tolist()
        }
        json_output["columns"].append(column_info)

    return json_output

known_types = [
    "NAME",
    "ADDRESS",
    "EMAIL ADDRESS",
    "TELEPHONE NUMBER",
    "SOCIAL SECURITY NUMBER",
    "CREDIT CARD NUMBER",
    "UNKNOWN"
]
types_str = ', '.join(map(str, known_types))

llm = OpenAI()
chat_model = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')

with gr.Blocks() as demo:
    file_name = gr.Textbox(label="CSV File")
    output = gr.Textbox(label="Result")
    analyze_btn = gr.Button("Analyze")
    analyze_btn.click(fn=read_csv, inputs=file_name, outputs=output, api_name="read_csv")

if __name__ == "__main__":
    #demo.launch(show_api=False, debug=True)
    demo.launch(debug=True, share=True)