Poorva Malviya commited on
Commit
e7915f7
·
1 Parent(s): 474c6f0

random changes

Browse files
Files changed (1) hide show
  1. app.py +59 -57
app.py CHANGED
@@ -24,8 +24,8 @@ def main():
24
 
25
  if st.session_state.step == 1:
26
  step_1_upload_and_analyze()
27
- elif st.session_state.step == 2:
28
- step_2_clean_data()
29
  elif st.session_state.step == 3:
30
  step_3_chat_with_data()
31
 
@@ -49,63 +49,63 @@ def step_1_upload_and_analyze():
49
  st.write(df.head())
50
  st.write("---")
51
 
52
- if st.button("Proceed to Data Cleaning"):
53
- st.session_state.step = 2
54
 
55
- def step_2_clean_data():
56
- st.subheader("Step 2: Clean Data")
57
 
58
- llm = OpenAI(temperature=0)
59
 
60
- for name, df in st.session_state.dataframes.items():
61
- st.write(f"Cleaning recommendations for {name}:")
62
 
63
- # Create a summary of the dataframe
64
- summary = f"Dataframe '{name}' summary:\n"
65
- summary += f"- Shape: {df.shape}\n"
66
- summary += f"- Columns: {', '.join(df.columns)}\n"
67
- summary += "- Data types:\n"
68
- for col, dtype in df.dtypes.items():
69
- summary += f" - {col}: {dtype}\n"
70
- summary += "- Sample data (first 5 rows):\n"
71
- summary += df.head().to_string()
72
-
73
- # Split the summary into smaller chunks
74
- chunk_size = 1500 # Reduced chunk size
75
- chunks = textwrap.wrap(summary, chunk_size)
76
-
77
- cleaning_recommendations = []
78
- with st.spinner("Analyzing data and generating recommendations..."):
79
- for i, chunk in enumerate(chunks):
80
- chunk_result = analyze_chunk(llm, df, chunk)
81
- cleaning_recommendations.append(chunk_result)
82
-
83
- # Combine all recommendations
84
- full_recommendations = "\n".join(cleaning_recommendations)
85
- st.write(full_recommendations)
86
 
87
- # Create checkboxes for cleaning operations
88
- cleaning_ops = [op.strip() for op in full_recommendations.split('\n') if op.strip()]
89
- st.session_state.cleaning_operations[name] = []
90
- for op in cleaning_ops:
91
- if st.checkbox(op, key=f"{name}_{op}"):
92
- st.session_state.cleaning_operations[name].append(op)
93
-
94
- if st.button("Apply Cleaning and Proceed to Chat"):
95
- for name, ops in st.session_state.cleaning_operations.items():
96
- df = st.session_state.dataframes[name]
97
- for op in ops:
98
- # Here you would implement the actual cleaning operations
99
- # For now, we'll just print what would be done
100
- st.write(f"Applying to {name}: {op}")
101
 
102
- st.session_state.step = 3
103
- st.success("Cleaning operations applied. Proceeding to chat interface.")
104
- st.button("Go to Chat Interface")
105
 
106
- if st.button("Back to Data Upload"):
107
- st.session_state.step = 1
108
- st.experimental_rerun()
109
 
110
  def step_3_chat_with_data():
111
  st.subheader("Step 3: Chat with your data")
@@ -116,11 +116,11 @@ def step_3_chat_with_data():
116
  st.session_state.chat_history.append(("User", user_input))
117
  st.session_state.chat_history.append(("AI", response))
118
 
119
- for role, message in st.session_state.chat_history:
120
  if role == "User":
121
  st.text_area("You:", value=message, height=50, disabled=True)
122
  else:
123
- st.text_area("AI:", value=message, height=100, disabled=True)
124
 
125
  def process_user_input(user_input):
126
  llm = OpenAI(temperature=0)
@@ -136,10 +136,12 @@ def process_user_input(user_input):
136
  combined_df,
137
  verbose=True,
138
  agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
139
- allow_dangerous_code=True
 
140
  )
141
 
142
- full_input = f"{df_summary}\nThe data from all files has been combined into a single DataFrame with an additional 'source' column indicating the original file.\n\nUser question: {user_input}"
 
143
 
144
  response = agent.run(full_input)
145
  return response
@@ -163,4 +165,4 @@ def analyze_chunk(llm, df, chunk, timeout=30):
163
  return f"Analysis timed out or encountered an error: {str(e)}"
164
 
165
  if __name__ == "__main__":
166
- main()
 
24
 
25
  if st.session_state.step == 1:
26
  step_1_upload_and_analyze()
27
+ # elif st.session_state.step == 2:
28
+ # step_2_clean_data()
29
  elif st.session_state.step == 3:
30
  step_3_chat_with_data()
31
 
 
49
  st.write(df.head())
50
  st.write("---")
51
 
52
+ if st.button("Explore Data"):
53
+ st.session_state.step = 3
54
 
55
+ # def step_2_clean_data():
56
+ # st.subheader("Step 2: Clean Data")
57
 
58
+ # llm = OpenAI(temperature=0)
59
 
60
+ # for name, df in st.session_state.dataframes.items():
61
+ # st.write(f"Cleaning recommendations for {name}:")
62
 
63
+ # # Create a summary of the dataframe
64
+ # summary = f"Dataframe '{name}' summary:\n"
65
+ # summary += f"- Shape: {df.shape}\n"
66
+ # summary += f"- Columns: {', '.join(df.columns)}\n"
67
+ # summary += "- Data types:\n"
68
+ # for col, dtype in df.dtypes.items():
69
+ # summary += f" - {col}: {dtype}\n"
70
+ # summary += "- Sample data (first 5 rows):\n"
71
+ # summary += df.head().to_string()
72
+
73
+ # # Split the summary into smaller chunks
74
+ # chunk_size = 1500 # Reduced chunk size
75
+ # chunks = textwrap.wrap(summary, chunk_size)
76
+
77
+ # cleaning_recommendations = []
78
+ # with st.spinner("Analyzing data and generating recommendations..."):
79
+ # for i, chunk in enumerate(chunks):
80
+ # chunk_result = analyze_chunk(llm, df, chunk)
81
+ # cleaning_recommendations.append(chunk_result)
82
+
83
+ # # Combine all recommendations
84
+ # full_recommendations = "\n".join(cleaning_recommendations)
85
+ # st.write(full_recommendations)
86
 
87
+ # # Create checkboxes for cleaning operations
88
+ # cleaning_ops = [op.strip() for op in full_recommendations.split('\n') if op.strip()]
89
+ # st.session_state.cleaning_operations[name] = []
90
+ # for op in cleaning_ops:
91
+ # if st.checkbox(op, key=f"{name}_{op}"):
92
+ # st.session_state.cleaning_operations[name].append(op)
93
+
94
+ # if st.button("Apply Cleaning and Proceed to Chat"):
95
+ # for name, ops in st.session_state.cleaning_operations.items():
96
+ # df = st.session_state.dataframes[name]
97
+ # for op in ops:
98
+ # # Here you would implement the actual cleaning operations
99
+ # # For now, we'll just print what would be done
100
+ # st.write(f"Applying to {name}: {op}")
101
 
102
+ # st.session_state.step = 3
103
+ # st.success("Cleaning operations applied. Proceeding to chat interface.")
104
+ # st.button("Go to Chat Interface")
105
 
106
+ # if st.button("Back to Data Upload"):
107
+ # st.session_state.step = 1
108
+ # st.experimental_rerun()
109
 
110
  def step_3_chat_with_data():
111
  st.subheader("Step 3: Chat with your data")
 
116
  st.session_state.chat_history.append(("User", user_input))
117
  st.session_state.chat_history.append(("AI", response))
118
 
119
+ for role, message in reversed(st.session_state.chat_history):
120
  if role == "User":
121
  st.text_area("You:", value=message, height=50, disabled=True)
122
  else:
123
+ st.write(message)
124
 
125
  def process_user_input(user_input):
126
  llm = OpenAI(temperature=0)
 
136
  combined_df,
137
  verbose=True,
138
  agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
139
+ allow_dangerous_code=True,
140
+ handle_parsing_errors=True
141
  )
142
 
143
+ # full_input = f"{df_summary}\nThe data from all files has been combined into a single DataFrame with an additional 'source' column indicating the original file.\n\nUser question: {user_input}"
144
+ full_input = f"{combined_df}\nAs a data analyst, process the data to answer the user question.\n\nUser question: {user_input}"
145
 
146
  response = agent.run(full_input)
147
  return response
 
165
  return f"Analysis timed out or encountered an error: {str(e)}"
166
 
167
  if __name__ == "__main__":
168
+ main()