PIIDetectorDemo / app.py
sanketmalde's picture
Update app.py
f6d39aa verified
import os
import json
import gradio as gr
import pandas as pd
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
def read_csv(csv_file):
df = pd.read_csv(csv_file)
json_output = convert_df_to_json(df, os.path.splitext(os.path.basename(csv_file))[0])
prompt = generate_prompt(json_output)
print("Prompt: ")
print(prompt)
response = chat_model.invoke(prompt)
json_content = response.content
json_data = json.loads(json_content)
pretty_json_string = json.dumps(json_data, indent=4)
print("Response: ")
print(pretty_json_string)
return pretty_json_string
def generate_prompt(json_output):
preamble = """
Given below is the data found in a relational database table in JSON format:
"""
mid = """
For each column, tell me whether the data is of one of the following types: """
end = """
Your output should be in the following format:
{ "tableName": "table_name",
"columns": [
{"columnName": "column1", "columnType": “one of the types given above”},
{"columnName": "column2", "columnType": “one of the types given above”},
]
}
"""
prompt = preamble + json.dumps(json_output) + mid + types_str + end
return prompt
def convert_df_to_json(df, table_name):
json_output = {
"tableName": table_name,
"columns": []
}
for column in df.columns:
column_info = {
"columnName": column,
"exampleValues": df[column].tolist()
}
json_output["columns"].append(column_info)
return json_output
known_types = [
"NAME",
"ADDRESS",
"EMAIL ADDRESS",
"TELEPHONE NUMBER",
"SOCIAL SECURITY NUMBER",
"CREDIT CARD NUMBER",
"UNKNOWN"
]
types_str = ', '.join(map(str, known_types))
llm = OpenAI()
chat_model = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')
with gr.Blocks() as demo:
file_name = gr.Textbox(label="CSV File")
output = gr.Textbox(label="Result")
analyze_btn = gr.Button("Analyze")
analyze_btn.click(fn=read_csv, inputs=file_name, outputs=output, api_name="read_csv")
if __name__ == "__main__":
#demo.launch(show_api=False, debug=True)
demo.launch(debug=True, share=True)