fizzah90 commited on
Commit
304fb97
·
verified ·
1 Parent(s): 41c1613

intial commit

Browse files
Files changed (2) hide show
  1. app.py +244 -0
  2. utils.py +42 -0
app.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from utils import readData, getAgent
6
+
7
+ def analyze_data_types(df):
8
+ num_numeric = df.select_dtypes(include=['int64', 'float64']).shape[1]
9
+ num_categorical = df.select_dtypes(include=['object', 'category']).shape[1]
10
+ return num_numeric, num_categorical
11
+
12
+ st.set_page_config(
13
+ page_title="DataBot",
14
+ page_icon="🤖",
15
+ layout="wide",
16
+ initial_sidebar_state="expanded"
17
+ )
18
+
19
+ # Initialize session state for code display only
20
+ if 'show_code' not in st.session_state:
21
+ st.session_state.show_code = False
22
+
23
+ # Single sidebar control for code display
24
+ with st.sidebar:
25
+ st.markdown("<h1 style='color: white;'>Settings</h1>", unsafe_allow_html=True)
26
+ show_code = st.checkbox('Show Python Code',
27
+ value=st.session_state.show_code,
28
+ key='code_toggle_single')
29
+
30
+ # Set dark mode directly
31
+ st.markdown(
32
+ """
33
+ <style>
34
+ .stApp {
35
+ background-color: #000000;
36
+ color: #FFFFFF;
37
+ }
38
+ .stSidebar {
39
+ background-color: #000000;
40
+ color: #FFFFFF;
41
+ }
42
+ header[data-testid="stHeader"] {
43
+ background-color: #000000;
44
+ }
45
+ .stButton button {
46
+ background-color: #4A4A4A;
47
+ color: #FFFFFF;
48
+ border: 1px solid #404040;
49
+ }
50
+ .dataframe {
51
+ background-color: #000000; /* Black background for tables */
52
+ color: #FFFFFF;
53
+ border: 1px solid #404040;
54
+ }
55
+ .stMarkdown h1, .stMarkdown h2, .stMarkdown h3, .stMarkdown h4,
56
+ .stMarkdown h5, .stMarkdown h6 {
57
+ color: #FFFFFF;
58
+ }
59
+ .stTextInput input, .stSelectbox select {
60
+ background-color: #333333;
61
+ color: #FFFFFF;
62
+ border: 1px solid #404040;
63
+ }
64
+ .stFileUploader {
65
+ background-color: #000000; /* Black background for drag-and-drop */
66
+ color: #FFFFFF;
67
+ border: 1px solid #808080; /* Grey border */
68
+ }
69
+ .stCheckbox > div:first-child {
70
+ color: #FFFFFF;
71
+ }
72
+ .stChatMessage {
73
+ background-color: #808080; /* Grey background for text responses */
74
+ color: #FFFFFF;
75
+ border-radius: 8px;
76
+ padding: 8px 12px;
77
+ }
78
+ .stChatMessage-user {
79
+ background-color: #808080; /* Grey background for user responses */
80
+ color: #FFFFFF;
81
+ align-self: flex-end;
82
+ }
83
+ .stChatMessage-assistant {
84
+ background-color: #808080; /* Grey background for assistant responses */
85
+ color: #FFFFFF;
86
+ align-self: flex-start;
87
+ }
88
+ .stChatInput {
89
+ background-color: #808080; /* Grey background for chat input */
90
+ color: #FFFFFF;
91
+ border: 1px solid #404040;
92
+ }
93
+ .stSlider > div > div > div {
94
+ background-color: #4A4A4A;
95
+ }
96
+ .element-container {
97
+ background-color: #000000 !important; /* Black background for plots */
98
+ }
99
+ </style>
100
+ """,
101
+ unsafe_allow_html=True
102
+ )
103
+
104
+ # Single main title
105
+ st.markdown("<h1 style='color: white;'>DataBot: Your AI-Driven Data Analyst 😊</h1>", unsafe_allow_html=True)
106
+
107
+ # Function to update code display
108
+ def update_code_display(code_snippet, section_key):
109
+ if show_code:
110
+ st.sidebar.code(code_snippet, language="python", key=f'code_section_{section_key}')
111
+
112
+ # Theme and Code Display Configuration
113
+ if 'theme' not in st.session_state:
114
+ st.session_state.theme = 'dark'
115
+ st.session_state.show_code = False
116
+ st.session_state.current_code = ""
117
+
118
+ # Define tabs first
119
+ # File uploader
120
+ uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
121
+
122
+ if uploaded_file is not None:
123
+ try:
124
+ df = readData(uploaded_file)
125
+ num_numeric, num_categorical = analyze_data_types(df)
126
+
127
+ # Sidebar stats
128
+ st.sidebar.markdown("<h1 style='color: white;'>Data Overview 📊</h1>", unsafe_allow_html=True)
129
+ st.sidebar.write(f"Total columns: {df.shape[1]}")
130
+ st.sidebar.write(f"Total rows: {df.shape[0]}")
131
+ st.sidebar.write(f"Numeric columns: {num_numeric}")
132
+ st.sidebar.write(f"Categorical columns: {num_categorical}")
133
+
134
+ if num_numeric <= 1:
135
+ st.warning("This dataset is mostly descriptive. Limited statistical analysis available.")
136
+ st.write("Data Preview:")
137
+ st.dataframe(df.head())
138
+ st.write("Data Types:")
139
+ st.write(df.dtypes)
140
+ else:
141
+ # Define tabs after file upload
142
+ tabs = st.tabs(["Analysis", "Visualization", "Chat"])
143
+
144
+ with tabs[0]:
145
+ st.header("Data Analysis")
146
+ st.dataframe(df.head())
147
+ if num_numeric > 0:
148
+ st.write("Statistical Summary:")
149
+ st.write(df.describe())
150
+
151
+ # Visualization section with multiple column selection
152
+ with tabs[1]:
153
+ st.header("Data Visualization")
154
+ if num_numeric > 1:
155
+ numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
156
+
157
+ col1, col2 = st.columns(2)
158
+ with col1:
159
+ plot_type = st.selectbox("Select Plot Type", ["Bar", "Scatter", "Histogram", "Box", "Line"])
160
+ selected_columns = st.multiselect("Select Columns to Visualize", numeric_cols, default=numeric_cols[0])
161
+
162
+ with col2:
163
+ fig_width = st.slider("Plot width", 4, 12, 6)
164
+ fig_height = st.slider("Plot height", 3, 8, 4)
165
+ if plot_type == "Bar":
166
+ n_bars = st.slider("Number of bars", 5, 50, 20)
167
+
168
+ if st.button("Generate Plot", key='gen_plot'):
169
+ fig, ax = plt.subplots(figsize=(fig_width, fig_height))
170
+
171
+ if plot_type == "Bar":
172
+ data_subset = df[selected_columns].head(n_bars)
173
+ data_subset.plot(kind='bar', ax=ax)
174
+ elif plot_type == "Scatter":
175
+ if len(selected_columns) >= 2:
176
+ sns.scatterplot(data=df, x=selected_columns[0], y=selected_columns[1], ax=ax)
177
+ elif plot_type == "Box":
178
+ df[selected_columns].boxplot(ax=ax)
179
+ elif plot_type == "Line":
180
+ df[selected_columns].plot(ax=ax)
181
+ elif plot_type == "Histogram":
182
+ df[selected_columns].hist(ax=ax)
183
+
184
+ plt.xticks(rotation=45)
185
+ plt.tight_layout()
186
+ st.pyplot(fig)
187
+ else:
188
+ st.info("Not enough numerical columns for visualization")
189
+
190
+ # Chat Interface
191
+ def format_response(response):
192
+ if isinstance(response, pd.DataFrame):
193
+ return response.to_html()
194
+ elif isinstance(response, str):
195
+ if 'Action Input:' in response:
196
+ output_start = response.find('Action Input:') + len('Action Input:')
197
+ return response[output_start:].strip()
198
+ return response
199
+ return str(response)
200
+
201
+ with tabs[2]:
202
+ st.header("Chat with your Data")
203
+
204
+ if "messages" not in st.session_state:
205
+ st.session_state.messages = []
206
+
207
+ # Display chat history
208
+ for message in st.session_state.messages:
209
+ with st.chat_message(message["role"]):
210
+ if message["role"] == "assistant" and st.session_state.show_code:
211
+ st.code(message["content"], language="python")
212
+ else:
213
+ st.markdown(f"<span style='background-color: #808080; color: white;'>{message['content']}</span>", unsafe_allow_html=True)
214
+
215
+ # Chat input and response
216
+ if prompt := st.chat_input("Ask about your data"):
217
+ st.session_state.messages.append({"role": "user", "content": prompt})
218
+ with st.chat_message("user"):
219
+ st.markdown(f"<span style='background-color: #808080; color: white;'>{prompt}</span>", unsafe_allow_html=True)
220
+
221
+ with st.chat_message("assistant"):
222
+ try:
223
+ agent = getAgent(df)
224
+ response = agent.run(prompt)
225
+ formatted_response = format_response(response)
226
+
227
+ if st.session_state.show_code:
228
+ st.code(formatted_response, language="python")
229
+ else:
230
+ st.markdown(f"<span style='background-color: #808080; color: white;'>{formatted_response}</span>", unsafe_allow_html=True)
231
+
232
+ st.session_state.messages.append({
233
+ "role": "assistant",
234
+ "content": formatted_response
235
+ })
236
+ analysis_code = "agent.run(prompt)"
237
+ update_code_display(analysis_code, "chat")
238
+ except Exception as e:
239
+ st.error(f"Error: {str(e)}")
240
+
241
+ except Exception as e:
242
+ st.error(f"Error loading file: {str(e)}")
243
+ else:
244
+ st.info("Please upload a CSV file to begin analysis")
utils.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_experimental.agents import create_pandas_dataframe_agent
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ import pandas as pd
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ def format_agent_output(output):
10
+ if isinstance(output, pd.DataFrame):
11
+ return output
12
+ elif isinstance(output, str):
13
+ if 'DataFrame' in output or 'describe' in output:
14
+ return pd.DataFrame(eval(output.split('Input:')[-1].strip()))
15
+ return output
16
+ return str(output)
17
+
18
+ def readData(path):
19
+ try:
20
+ df = pd.read_csv(path)
21
+ return df
22
+ except Exception as e:
23
+ raise Exception(f"Error reading data: {str(e)}")
24
+
25
+ def getAgent(data):
26
+ try:
27
+ llm = ChatGoogleGenerativeAI(
28
+ model="gemini-pro",
29
+ temperature=0.5,
30
+ google_api_key=os.environ.get("GOOGLE_API_KEY")
31
+ )
32
+
33
+ agent = create_pandas_dataframe_agent(
34
+ llm,
35
+ data,
36
+ verbose=True,
37
+ handle_parsing_errors=True,
38
+ allow_dangerous_code=True # Enable code execution
39
+ )
40
+ return agent
41
+ except Exception as e:
42
+ raise Exception(f"Error creating agent: {str(e)}")