eagle0504 commited on
Commit
74de699
Β·
verified Β·
1 Parent(s): a470fe1

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +343 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+ from io import StringIO
9
+ import sys
10
+ import traceback
11
+ from contextlib import redirect_stdout, redirect_stderr
12
+ import openai
13
+ from openai import OpenAI
14
+ import re
15
+ import warnings
16
+ warnings.filterwarnings('ignore')
17
+
18
+ # Set page config
19
+ st.set_page_config(
20
+ page_title="CSV Chat Assistant",
21
+ page_icon="πŸ“Š",
22
+ layout="wide",
23
+ initial_sidebar_state="expanded"
24
+ )
25
+
26
+ # Custom CSS
27
+ st.markdown("""
28
+ <style>
29
+ .main-header {
30
+ font-size: 3rem;
31
+ font-weight: bold;
32
+ text-align: center;
33
+ margin-bottom: 2rem;
34
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
35
+ -webkit-background-clip: text;
36
+ -webkit-text-fill-color: transparent;
37
+ }
38
+ .stAlert {
39
+ margin-top: 1rem;
40
+ }
41
+ .code-output {
42
+ background-color: #f0f2f6;
43
+ padding: 1rem;
44
+ border-radius: 0.5rem;
45
+ border-left: 3px solid #667eea;
46
+ }
47
+ </style>
48
+ """, unsafe_allow_html=True)
49
+
50
+ def execute_python_code(code, df):
51
+ """Execute Python code safely and capture output"""
52
+ # Create a copy of the dataframe to avoid modifying the original
53
+ df_copy = df.copy()
54
+
55
+ # Capture stdout and stderr
56
+ old_stdout = sys.stdout
57
+ old_stderr = sys.stderr
58
+
59
+ stdout_capture = StringIO()
60
+ stderr_capture = StringIO()
61
+
62
+ # Global namespace for code execution
63
+ globals_dict = {
64
+ 'df': df_copy,
65
+ 'pd': pd,
66
+ 'np': np,
67
+ 'plt': plt,
68
+ 'sns': sns,
69
+ 'px': px,
70
+ 'go': go,
71
+ 'st': st,
72
+ '__builtins__': __builtins__
73
+ }
74
+
75
+ try:
76
+ sys.stdout = stdout_capture
77
+ sys.stderr = stderr_capture
78
+
79
+ # Execute the code
80
+ exec(code, globals_dict)
81
+
82
+ # Get the output
83
+ output = stdout_capture.getvalue()
84
+ error = stderr_capture.getvalue()
85
+
86
+ return output, error, None, globals_dict.get('df', df_copy)
87
+
88
+ except Exception as e:
89
+ error = stderr_capture.getvalue() + f"\nError: {str(e)}\n{traceback.format_exc()}"
90
+ return "", error, str(e), df_copy
91
+
92
+ finally:
93
+ sys.stdout = old_stdout
94
+ sys.stderr = old_stderr
95
+
96
+ def generate_python_code(user_query, df_info, api_key):
97
+ """Generate Python code from natural language using OpenAI"""
98
+ try:
99
+ client = OpenAI(api_key=api_key)
100
+
101
+ prompt = f"""
102
+ You are a Python code generator for data analysis. Generate Python code based on the user's request.
103
+
104
+ Dataset Information:
105
+ - Columns: {list(df_info['columns'])}
106
+ - Shape: {df_info['shape']}
107
+ - Data types: {df_info['dtypes']}
108
+ - Sample data (first few rows): {df_info['sample']}
109
+
110
+ User Query: {user_query}
111
+
112
+ Guidelines:
113
+ 1. The dataframe is already loaded as 'df'
114
+ 2. Use pandas, numpy, matplotlib, seaborn, or plotly as needed
115
+ 3. For visualizations, use st.pyplot(plt.gcf()) for matplotlib/seaborn or st.plotly_chart() for plotly
116
+ 4. Print results using print() statements
117
+ 5. Keep code concise and focused on the user's request
118
+ 6. If creating plots, make sure to show them in Streamlit
119
+ 7. Handle missing values appropriately
120
+ 8. Use descriptive variable names
121
+
122
+ Return ONLY the Python code, no explanations or markdown formatting.
123
+ """
124
+
125
+ response = client.chat.completions.create(
126
+ model="gpt-3.5-turbo",
127
+ messages=[
128
+ {"role": "system", "content": "You are a helpful data analysis assistant that generates Python code."},
129
+ {"role": "user", "content": prompt}
130
+ ],
131
+ temperature=0.1,
132
+ max_tokens=1000
133
+ )
134
+
135
+ code = response.choices[0].message.content.strip()
136
+
137
+ # Clean up the code (remove markdown formatting if present)
138
+ code = re.sub(r'```python\n?', '', code)
139
+ code = re.sub(r'```\n?', '', code)
140
+
141
+ return code
142
+
143
+ except Exception as e:
144
+ st.error(f"Error generating code: {str(e)}")
145
+ return None
146
+
147
+ def get_dataframe_info(df):
148
+ """Get comprehensive information about the dataframe"""
149
+ return {
150
+ 'columns': df.columns.tolist(),
151
+ 'shape': df.shape,
152
+ 'dtypes': df.dtypes.to_dict(),
153
+ 'sample': df.head(3).to_dict(),
154
+ 'describe': df.describe().to_dict() if len(df.select_dtypes(include=[np.number]).columns) > 0 else {},
155
+ 'null_counts': df.isnull().sum().to_dict()
156
+ }
157
+
158
+ def main():
159
+ # Header
160
+ st.markdown('<h1 class="main-header">πŸ“Š CSV Chat Assistant</h1>', unsafe_allow_html=True)
161
+ st.markdown("Upload your CSV file and chat with your data using natural language!")
162
+
163
+ # Sidebar
164
+ with st.sidebar:
165
+ st.header("πŸ”§ Configuration")
166
+
167
+ # API Key input
168
+ st.subheader("πŸ”‘ OpenAI API Key")
169
+ api_key = st.text_input(
170
+ "Enter your OpenAI API Key:",
171
+ type="password",
172
+ help="Get your API key from https://platform.openai.com/api-keys"
173
+ )
174
+
175
+ if api_key:
176
+ st.success("βœ… API Key provided")
177
+ else:
178
+ st.warning("⚠️ Please enter your OpenAI API Key")
179
+
180
+ st.divider()
181
+
182
+ # File upload
183
+ st.subheader("πŸ“ Upload CSV File")
184
+ uploaded_file = st.file_uploader(
185
+ "Drag and drop your CSV file here:",
186
+ type=['csv'],
187
+ help="Upload a CSV file to start analyzing your data"
188
+ )
189
+
190
+ if uploaded_file is not None:
191
+ st.success(f"βœ… File uploaded: {uploaded_file.name}")
192
+
193
+ # Display file info
194
+ file_details = {
195
+ "Filename": uploaded_file.name,
196
+ "File size": f"{uploaded_file.size} bytes"
197
+ }
198
+ st.json(file_details)
199
+
200
+ # Main content
201
+ if uploaded_file is not None and api_key:
202
+ try:
203
+ # Load the CSV file
204
+ df = pd.read_csv(uploaded_file)
205
+
206
+ # Store dataframe in session state
207
+ st.session_state['df'] = df
208
+ st.session_state['api_key'] = api_key
209
+
210
+ # Display dataset overview
211
+ col1, col2 = st.columns([2, 1])
212
+
213
+ with col1:
214
+ st.subheader("πŸ“‹ Dataset Overview")
215
+ st.dataframe(df.head(), use_container_width=True)
216
+
217
+ with col2:
218
+ st.subheader("πŸ“Š Dataset Info")
219
+ st.write(f"**Shape:** {df.shape}")
220
+ st.write(f"**Columns:** {len(df.columns)}")
221
+ st.write(f"**Rows:** {len(df)}")
222
+
223
+ if len(df.select_dtypes(include=[np.number]).columns) > 0:
224
+ st.write("**Numerical Columns:**")
225
+ for col in df.select_dtypes(include=[np.number]).columns:
226
+ st.write(f"- {col}")
227
+
228
+ if len(df.select_dtypes(include=['object']).columns) > 0:
229
+ st.write("**Text Columns:**")
230
+ for col in df.select_dtypes(include=['object']).columns:
231
+ st.write(f"- {col}")
232
+
233
+ st.divider()
234
+
235
+ # Chat interface
236
+ st.subheader("πŸ’¬ Chat with Your Data")
237
+ st.write("Ask questions about your data in natural language. Examples:")
238
+
239
+ # Example queries
240
+ examples = [
241
+ "Show me the first 10 rows",
242
+ "What are the summary statistics?",
243
+ "Create a histogram of [column_name]",
244
+ "Show correlation between columns",
245
+ "Find rows where [column] > [value]",
246
+ "Create a scatter plot of X vs Y",
247
+ "Group by [column] and show counts"
248
+ ]
249
+
250
+ cols = st.columns(3)
251
+ for i, example in enumerate(examples):
252
+ with cols[i % 3]:
253
+ if st.button(example, key=f"example_{i}"):
254
+ st.session_state['user_query'] = example
255
+
256
+ # Chat input
257
+ user_query = st.text_area(
258
+ "Ask a question about your data:",
259
+ value=st.session_state.get('user_query', ''),
260
+ height=100,
261
+ placeholder="e.g., 'Show me a bar chart of the top 10 values in the sales column'"
262
+ )
263
+
264
+ col1, col2, col3 = st.columns([1, 1, 4])
265
+ with col1:
266
+ if st.button("πŸš€ Generate & Run", type="primary"):
267
+ if user_query.strip():
268
+ with st.spinner("Generating Python code..."):
269
+ df_info = get_dataframe_info(df)
270
+ code = generate_python_code(user_query, df_info, api_key)
271
+
272
+ if code:
273
+ st.session_state['generated_code'] = code
274
+ st.session_state['user_query'] = user_query
275
+ st.rerun()
276
+
277
+ with col2:
278
+ if st.button("πŸ—‘οΈ Clear"):
279
+ if 'generated_code' in st.session_state:
280
+ del st.session_state['generated_code']
281
+ if 'user_query' in st.session_state:
282
+ del st.session_state['user_query']
283
+ st.rerun()
284
+
285
+ # Display generated code and results
286
+ if 'generated_code' in st.session_state:
287
+ st.divider()
288
+
289
+ col1, col2 = st.columns([1, 1])
290
+
291
+ with col1:
292
+ st.subheader("🐍 Generated Python Code")
293
+ st.code(st.session_state['generated_code'], language='python')
294
+
295
+ if st.button("πŸ“‹ Copy Code"):
296
+ st.write("Code copied to clipboard!")
297
+
298
+ with col2:
299
+ st.subheader("🎯 Results")
300
+
301
+ # Execute the code
302
+ with st.spinner("Executing code..."):
303
+ output, error, exception, result_df = execute_python_code(
304
+ st.session_state['generated_code'],
305
+ df
306
+ )
307
+
308
+ if exception:
309
+ st.error(f"**Error occurred:**\n{error}")
310
+ else:
311
+ if output:
312
+ st.markdown('<div class="code-output">', unsafe_allow_html=True)
313
+ st.text(output)
314
+ st.markdown('</div>', unsafe_allow_html=True)
315
+
316
+ if error and not exception:
317
+ st.warning(f"**Warnings:**\n{error}")
318
+
319
+ # Show any plots that were generated
320
+ if plt.get_fignums():
321
+ st.pyplot(plt.gcf())
322
+ plt.close()
323
+
324
+ except Exception as e:
325
+ st.error(f"Error loading CSV file: {str(e)}")
326
+ st.write("Please make sure your CSV file is properly formatted.")
327
+
328
+ elif uploaded_file is None:
329
+ st.info("πŸ‘† Please upload a CSV file in the sidebar to get started.")
330
+
331
+ elif not api_key:
332
+ st.info("πŸ”‘ Please enter your OpenAI API key in the sidebar to use the chat feature.")
333
+
334
+ # Footer
335
+ st.divider()
336
+ st.markdown("""
337
+ <div style="text-align: center; color: #666;">
338
+ <p>Built with ❀️ using Streamlit | Powered by OpenAI GPT-3.5</p>
339
+ </div>
340
+ """, unsafe_allow_html=True)
341
+
342
+ if __name__ == "__main__":
343
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit>=1.28.0
2
+ pandas>=2.0.0
3
+ numpy>=1.24.0
4
+ matplotlib>=3.7.0
5
+ seaborn>=0.12.0
6
+ plotly>=5.15.0
7
+ openai>=1.3.0