import os import json import gradio as gr import pandas as pd from langchain.llms import OpenAI from langchain.chat_models import ChatOpenAI def read_csv(csv_file): df = pd.read_csv(csv_file) json_output = convert_df_to_json(df, os.path.splitext(os.path.basename(csv_file))[0]) prompt = generate_prompt(json_output) print("Prompt: ") print(prompt) response = chat_model.invoke(prompt) json_content = response.content json_data = json.loads(json_content) pretty_json_string = json.dumps(json_data, indent=4) print("Response: ") print(pretty_json_string) return pretty_json_string def generate_prompt(json_output): preamble = """ Given below is the data found in a relational database table in JSON format: """ mid = """ For each column, tell me whether the data is of one of the following types: """ end = """ Your output should be in the following format: { "tableName": "table_name", "columns": [ {"columnName": "column1", "columnType": “one of the types given above”}, {"columnName": "column2", "columnType": “one of the types given above”}, … ] } """ prompt = preamble + json.dumps(json_output) + mid + types_str + end return prompt def convert_df_to_json(df, table_name): json_output = { "tableName": table_name, "columns": [] } for column in df.columns: column_info = { "columnName": column, "exampleValues": df[column].tolist() } json_output["columns"].append(column_info) return json_output known_types = [ "NAME", "ADDRESS", "EMAIL ADDRESS", "TELEPHONE NUMBER", "SOCIAL SECURITY NUMBER", "CREDIT CARD NUMBER", "UNKNOWN" ] types_str = ', '.join(map(str, known_types)) llm = OpenAI() chat_model = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo') with gr.Blocks() as demo: file_name = gr.Textbox(label="CSV File") output = gr.Textbox(label="Result") analyze_btn = gr.Button("Analyze") analyze_btn.click(fn=read_csv, inputs=file_name, outputs=output, api_name="read_csv") if __name__ == "__main__": #demo.launch(show_api=False, debug=True) demo.launch(debug=True, share=True)