Poorva Malviya commited on
Commit ·
e7915f7
1
Parent(s): 474c6f0
random changes
Browse files
app.py
CHANGED
|
@@ -24,8 +24,8 @@ def main():
|
|
| 24 |
|
| 25 |
if st.session_state.step == 1:
|
| 26 |
step_1_upload_and_analyze()
|
| 27 |
-
elif st.session_state.step == 2:
|
| 28 |
-
|
| 29 |
elif st.session_state.step == 3:
|
| 30 |
step_3_chat_with_data()
|
| 31 |
|
|
@@ -49,63 +49,63 @@ def step_1_upload_and_analyze():
|
|
| 49 |
st.write(df.head())
|
| 50 |
st.write("---")
|
| 51 |
|
| 52 |
-
if st.button("
|
| 53 |
-
st.session_state.step =
|
| 54 |
|
| 55 |
-
def step_2_clean_data():
|
| 56 |
-
|
| 57 |
|
| 58 |
-
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
|
| 110 |
def step_3_chat_with_data():
|
| 111 |
st.subheader("Step 3: Chat with your data")
|
|
@@ -116,11 +116,11 @@ def step_3_chat_with_data():
|
|
| 116 |
st.session_state.chat_history.append(("User", user_input))
|
| 117 |
st.session_state.chat_history.append(("AI", response))
|
| 118 |
|
| 119 |
-
for role, message in st.session_state.chat_history:
|
| 120 |
if role == "User":
|
| 121 |
st.text_area("You:", value=message, height=50, disabled=True)
|
| 122 |
else:
|
| 123 |
-
st.
|
| 124 |
|
| 125 |
def process_user_input(user_input):
|
| 126 |
llm = OpenAI(temperature=0)
|
|
@@ -136,10 +136,12 @@ def process_user_input(user_input):
|
|
| 136 |
combined_df,
|
| 137 |
verbose=True,
|
| 138 |
agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
| 139 |
-
allow_dangerous_code=True
|
|
|
|
| 140 |
)
|
| 141 |
|
| 142 |
-
|
|
|
|
| 143 |
|
| 144 |
response = agent.run(full_input)
|
| 145 |
return response
|
|
@@ -163,4 +165,4 @@ def analyze_chunk(llm, df, chunk, timeout=30):
|
|
| 163 |
return f"Analysis timed out or encountered an error: {str(e)}"
|
| 164 |
|
| 165 |
if __name__ == "__main__":
|
| 166 |
-
main()
|
|
|
|
| 24 |
|
| 25 |
if st.session_state.step == 1:
|
| 26 |
step_1_upload_and_analyze()
|
| 27 |
+
# elif st.session_state.step == 2:
|
| 28 |
+
# step_2_clean_data()
|
| 29 |
elif st.session_state.step == 3:
|
| 30 |
step_3_chat_with_data()
|
| 31 |
|
|
|
|
| 49 |
st.write(df.head())
|
| 50 |
st.write("---")
|
| 51 |
|
| 52 |
+
if st.button("Explore Data"):
|
| 53 |
+
st.session_state.step = 3
|
| 54 |
|
| 55 |
+
# def step_2_clean_data():
|
| 56 |
+
# st.subheader("Step 2: Clean Data")
|
| 57 |
|
| 58 |
+
# llm = OpenAI(temperature=0)
|
| 59 |
|
| 60 |
+
# for name, df in st.session_state.dataframes.items():
|
| 61 |
+
# st.write(f"Cleaning recommendations for {name}:")
|
| 62 |
|
| 63 |
+
# # Create a summary of the dataframe
|
| 64 |
+
# summary = f"Dataframe '{name}' summary:\n"
|
| 65 |
+
# summary += f"- Shape: {df.shape}\n"
|
| 66 |
+
# summary += f"- Columns: {', '.join(df.columns)}\n"
|
| 67 |
+
# summary += "- Data types:\n"
|
| 68 |
+
# for col, dtype in df.dtypes.items():
|
| 69 |
+
# summary += f" - {col}: {dtype}\n"
|
| 70 |
+
# summary += "- Sample data (first 5 rows):\n"
|
| 71 |
+
# summary += df.head().to_string()
|
| 72 |
+
|
| 73 |
+
# # Split the summary into smaller chunks
|
| 74 |
+
# chunk_size = 1500 # Reduced chunk size
|
| 75 |
+
# chunks = textwrap.wrap(summary, chunk_size)
|
| 76 |
+
|
| 77 |
+
# cleaning_recommendations = []
|
| 78 |
+
# with st.spinner("Analyzing data and generating recommendations..."):
|
| 79 |
+
# for i, chunk in enumerate(chunks):
|
| 80 |
+
# chunk_result = analyze_chunk(llm, df, chunk)
|
| 81 |
+
# cleaning_recommendations.append(chunk_result)
|
| 82 |
+
|
| 83 |
+
# # Combine all recommendations
|
| 84 |
+
# full_recommendations = "\n".join(cleaning_recommendations)
|
| 85 |
+
# st.write(full_recommendations)
|
| 86 |
|
| 87 |
+
# # Create checkboxes for cleaning operations
|
| 88 |
+
# cleaning_ops = [op.strip() for op in full_recommendations.split('\n') if op.strip()]
|
| 89 |
+
# st.session_state.cleaning_operations[name] = []
|
| 90 |
+
# for op in cleaning_ops:
|
| 91 |
+
# if st.checkbox(op, key=f"{name}_{op}"):
|
| 92 |
+
# st.session_state.cleaning_operations[name].append(op)
|
| 93 |
+
|
| 94 |
+
# if st.button("Apply Cleaning and Proceed to Chat"):
|
| 95 |
+
# for name, ops in st.session_state.cleaning_operations.items():
|
| 96 |
+
# df = st.session_state.dataframes[name]
|
| 97 |
+
# for op in ops:
|
| 98 |
+
# # Here you would implement the actual cleaning operations
|
| 99 |
+
# # For now, we'll just print what would be done
|
| 100 |
+
# st.write(f"Applying to {name}: {op}")
|
| 101 |
|
| 102 |
+
# st.session_state.step = 3
|
| 103 |
+
# st.success("Cleaning operations applied. Proceeding to chat interface.")
|
| 104 |
+
# st.button("Go to Chat Interface")
|
| 105 |
|
| 106 |
+
# if st.button("Back to Data Upload"):
|
| 107 |
+
# st.session_state.step = 1
|
| 108 |
+
# st.experimental_rerun()
|
| 109 |
|
| 110 |
def step_3_chat_with_data():
|
| 111 |
st.subheader("Step 3: Chat with your data")
|
|
|
|
| 116 |
st.session_state.chat_history.append(("User", user_input))
|
| 117 |
st.session_state.chat_history.append(("AI", response))
|
| 118 |
|
| 119 |
+
for role, message in reversed(st.session_state.chat_history):
|
| 120 |
if role == "User":
|
| 121 |
st.text_area("You:", value=message, height=50, disabled=True)
|
| 122 |
else:
|
| 123 |
+
st.write(message)
|
| 124 |
|
| 125 |
def process_user_input(user_input):
|
| 126 |
llm = OpenAI(temperature=0)
|
|
|
|
| 136 |
combined_df,
|
| 137 |
verbose=True,
|
| 138 |
agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
| 139 |
+
allow_dangerous_code=True,
|
| 140 |
+
handle_parsing_errors=True
|
| 141 |
)
|
| 142 |
|
| 143 |
+
# full_input = f"{df_summary}\nThe data from all files has been combined into a single DataFrame with an additional 'source' column indicating the original file.\n\nUser question: {user_input}"
|
| 144 |
+
full_input = f"{combined_df}\nAs a data analyst, process the data to answer the user question.\n\nUser question: {user_input}"
|
| 145 |
|
| 146 |
response = agent.run(full_input)
|
| 147 |
return response
|
|
|
|
| 165 |
return f"Analysis timed out or encountered an error: {str(e)}"
|
| 166 |
|
| 167 |
if __name__ == "__main__":
|
| 168 |
+
main()
|