File size: 3,223 Bytes
0081066
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0a961a
 
0081066
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import streamlit as st
import pandas as pd
import os
from utils import save_json, load_json
from markdown import markdown
from utils import load_json
from autoviz import AutoViz_Class
import base64
from google.cloud import aiplatform
import base64
import vertexai
from vertexai.preview.generative_models import GenerativeModel, Part

#setup cloud
aiplatform.init(
    project = "ultra-heading-407815",
    location="us-central1"
    )

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"


dataframe = None
st.title("GemInsights")
file = st.file_uploader(
    "Pick a dataframe", type=["csv", "xlsx"], accept_multiple_files=False
)

if file is not None:
    _, extension = os.path.splitext(file.name)
    if extension == ".csv":
        dataframe = pd.read_csv(file)
    else:
        dataframe = pd.read_excel(file)
    st.write(dataframe.head())
    st.write(f"updated a dataframe with shape {dataframe.shape}")

if file is not None:
    text_input = st.text_input(
        "Enter something about the data πŸ‘‡",
        label_visibility="visible",
        disabled=False,
        placeholder="eg:- This is a sales dataframe",
    )

    option = st.selectbox(
        "Which is the target column?",
        tuple(list(dataframe.columns)),
        index=None,
        placeholder="Select one column in here",
    )

def plot(dataframe, target):

    AV = AutoViz_Class()

    dft = AV.AutoViz(
    "",
    sep=",",
    depVar=target,
    dfte=dataframe,
    header=0,
    verbose=2,
    lowess=False,
    chart_format="jpg",
    max_rows_analyzed=500,
    max_cols_analyzed=20,
    save_plot_dir="plots",
    )

def prompt_make(dataframe, target, info):
    images = []
    image_dir = f"plots/{target}"
    image_files = os.listdir(image_dir)
    for image_file in image_files:
        image_path = os.path.join(image_dir, image_file)
        img = open(image_path, "rb").read()
        img_bytes = Part.from_data(
            base64.b64decode(base64.encodebytes(img)), mime_type="image/jpeg"
        )
        images.append(img_bytes)
    with open("prompt.txt", "rb") as file:
        data = file.read()
    prompt = f"{data}\n Here are some of the informations related to the dataset - '{info}'"
    
    # print(f"{prompt}")
    # print(images)
    return prompt, images

def generate_res(prompt, images):
    print("prompting ...")
    model = GenerativeModel("gemini-pro-vision")
    responses = model.generate_content(
        [prompt]+images,
        generation_config={
            "max_output_tokens": 2048,
            "temperature": 0.4,
            "top_p": 1,
            "top_k": 32
        },
    )
    return responses.text



def generate(dataframe, text_input, option):
    plot(dataframe, option)
    prompt, images = prompt_make(dataframe, option, text_input)
    res = generate_res(prompt, images)
    return res

if st.button("Get Insights", type="primary"):
    st.write("Visualising the data into various plots πŸ“Š...")
    st.write("Generating insights from the visualization of the data πŸ’‘...")
    # running the pipeline

    response = generate(dataframe, text_input, option)
    res = markdown(response)
    st.markdown(res, unsafe_allow_html=True)

else:
    st.write("")