File size: 4,659 Bytes
0a995b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
import pandas as pd
import streamlit as st
from pandasai import SmartDataframe, SmartDatalake
from pandasai.llm import GooglePalm
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_experimental.agents import create_pandas_dataframe_agent
# from pandasai.llm.openai import OpenAI
from langchain_groq import ChatGroq


def read_csv_file(uploaded_file):
    try:
        df = pd.read_csv(uploaded_file)
    except UnicodeDecodeError:
        st.warning("UTF-8 encoding failed. Trying latin1 encoding.")
        try:
            df = pd.read_csv(uploaded_file, encoding='latin1')
            print(uploaded_file)
        except UnicodeDecodeError:
            st.error("Unable to read the file with both UTF-8 and ISO-8859-1 encodings.")
            return None
    return df


def main():
    st.set_page_config(page_title="Smart Data Query App", layout="wide")

    st.title("Smart Data Query App")

    # Choice to upload one or two CSV files
    file_count = st.radio("How many CSV files would you like to upload?", (1, 2))

    col1, col2 = st.columns(2)
    with col1:
        uploaded_file1 = st.file_uploader("Choose the first CSV file", type="csv", key="file1")
    if file_count == 2:
        with col2:
            uploaded_file2 = st.file_uploader("Choose the second CSV file", type="csv", key="file2")
    else:
        uploaded_file2 = None

    if uploaded_file1 is not None or uploaded_file2 is not None:
        df1 = read_csv_file(uploaded_file1) if uploaded_file1 is not None else None
        df2 = read_csv_file(uploaded_file2) if uploaded_file2 is not None else None

        if df1 is not None or df2 is not None:
            if df1 is not None:
                with st.expander("Preview First CSV File"):
                    st.dataframe(df1.head())
            if df2 is not None:
                with st.expander("Preview Second CSV File"):
                    st.dataframe(df2.head())

            pandas_api = os.environ['PANDASAI_API_KEY']
            google_api = os.environ['GOOGLE_API_KEY']
            groq_api = os.environ['GROQ_API_KEY']

            # Set up the ChatGroq model
            llm = ChatGroq(
                groq_api_key=groq_api,
                model_name='mixtral-8x7b-32768'
            )
            # llm = GooglePalm(api_key=google_api)

            if df1 is not None and df2 is not None:
                lake = SmartDatalake([df1, df2])
            else:
                lake = SmartDataframe(df1, config = {"LLM": llm, "conversational": True, "verbose": True}) if df1 is not None else None

            datalake_1 = lake
            query = st.text_input("Enter your query:")

            submitted = st.button("Submit")

            if submitted:
                if query:
                    response = datalake_1.chat(query)

                    if "Unfortunately, I was not able to answer your question, because of the following error:" in response:
                        llm = ChatGoogleGenerativeAI(
                            model="gemini-pro", verbose=True, google_api_key=google_api
                        )

                        agent = create_pandas_dataframe_agent(
                            llm, 
                            df1,
                            verbose=True,
                        )

                        response2 = agent.invoke(query)
                        st.write("Response:")
                        st.write(response2["output"])

                    else:
                        st.write("Response:")
                        st.write(response)

                    # Append the query and response to the session state for query history
                    if 'query_history' not in st.session_state:
                        st.session_state.query_history = []
                    st.session_state.query_history.append((query, response))
                else:
                    st.write("Please enter a query.")

            # Display query history
            if 'query_history' in st.session_state and st.session_state.query_history:
                st.subheader("Query History")
                for q, r in st.session_state.query_history:
                    st.write(f"**Query:** {q}")
                    st.write(f"**Response:** {r}")
                    st.write("---")

        else:
            st.error("Failed to read one or both CSV files. Please check the files and try again.")
    else:
        st.write("Please upload at least one CSV file.")


if __name__ == "__main__":
    main()