sanjaystarc commited on
Commit
0b69e41
Β·
verified Β·
1 Parent(s): fa75f31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -31
app.py CHANGED
@@ -3,56 +3,166 @@ import streamlit as st
3
  import pandas as pd
4
  import numpy as np
5
  import requests
 
 
6
 
7
  # --- CONFIG ---
 
8
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 
9
  if not GEMINI_API_KEY:
10
  st.error("❌ Missing Gemini API key. Add it as a secret: GEMINI_API_KEY")
11
  st.stop()
12
 
 
13
  GEMINI_BASE = "https://generativelanguage.googleapis.com/v1beta"
14
- CHAT_MODEL = "models/gemini-2.5-flash-lite"
15
- EMBED_MODEL = "models/embedding-001"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # --- Helper Functions ---
18
- def get_embedding(text):
19
- url = f"{GEMINI_BASE}/{EMBED_MODEL}:embedText?key={GEMINI_API_KEY}"
20
- data = {"text": text}
21
- r = requests.post(url, json=data)
22
- r.raise_for_status()
23
- return r.json()["embedding"]["value"]
24
-
25
- def chat_with_gemini(prompt, context=""):
26
  url = f"{GEMINI_BASE}/{CHAT_MODEL}:generateContent?key={GEMINI_API_KEY}"
 
 
 
 
27
  payload = {
28
  "contents": [
29
- {"parts": [{"text": f"{context}\n\nUser question: {prompt}"}]}
30
- ]
 
 
 
 
 
31
  }
32
- r = requests.post(url, json=payload)
33
- r.raise_for_status()
34
- data = r.json()
35
- return data["candidates"][0]["content"]["parts"][0]["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # --- UI ---
38
- st.title("πŸ“Š Data Analyst Agent (Gemini + Streamlit)")
39
- st.write("Upload a CSV file and ask natural language questions about your data.")
 
 
 
 
40
 
41
  uploaded = st.file_uploader("Upload CSV", type=["csv"])
42
 
43
  if uploaded:
44
- df = pd.read_csv(uploaded)
45
- st.dataframe(df.head())
46
-
47
- question = st.text_input("Ask a question about your data:")
48
- if st.button("Analyze") and question:
49
- # Summarize dataset for context
50
- summary = f"Columns: {', '.join(df.columns)}. Example rows:\n{df.head(3).to_string(index=False)}"
51
  try:
52
- response = chat_with_gemini(question, summary)
53
- st.markdown("### πŸ’¬ Gemini Answer:")
54
- st.write(response)
55
  except Exception as e:
56
- st.error(f"Error: {e}")
57
- else:
58
- st.info("πŸ‘† Upload a CSV file to begin.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import pandas as pd
4
  import numpy as np
5
  import requests
6
+ import json
7
+ import time # Ensure time is imported for backoff
8
 
9
  # --- CONFIG ---
10
+ # Note: GEMINI_API_KEY is retrieved from environment variables/secrets.
11
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
+
13
  if not GEMINI_API_KEY:
14
  st.error("❌ Missing Gemini API key. Add it as a secret: GEMINI_API_KEY")
15
  st.stop()
16
 
17
+ # Define API endpoints and models
18
  GEMINI_BASE = "https://generativelanguage.googleapis.com/v1beta"
19
+ CHAT_MODEL = "gemini-2.5-flash-preview-09-2025" # Using the correct model for structured output
20
+ EMBED_MODEL = "models/embedding-001"
21
+
22
+ # Define the JSON schema for structured output
23
+ ANALYSIS_SCHEMA = {
24
+ "type": "OBJECT",
25
+ "properties": {
26
+ "reasoning": {
27
+ "type": "STRING",
28
+ "description": "A detailed natural language explanation of the analysis, including key findings and context."
29
+ },
30
+ "code": {
31
+ "type": "STRING",
32
+ "description": "The complete, runnable Python code using pandas (df) and streamlit (st). Use st.pyplot() for plots, and st.dataframe() for resulting DataFrames. If no code is needed, this should be an empty string."
33
+ }
34
+ }
35
+ }
36
+
37
+ SYSTEM_INSTRUCTION = (
38
+ "You are a world-class Data Analyst Agent. Your task is to analyze the provided DataFrame ('df') "
39
+ "based on the user's question. You MUST respond with a single JSON object conforming to the provided schema. "
40
+ "1. **Reasoning:** Explain your plan, the steps taken, and the insights derived from the data. Format this in Markdown. "
41
+ "2. **Code:** If the question requires calculation, aggregation, or visualization, you MUST generate Python code to execute against the 'df' DataFrame. "
42
+ " - The DataFrame is already loaded as a variable named 'df'. Do NOT redefine it. "
43
+ " - Use Streamlit functions for output: `st.dataframe(...)` for results, `st.bar_chart()`, `st.line_chart()`, or `st.pyplot()` for plots. "
44
+ " - Use `import matplotlib.pyplot as plt` if creating custom plots. "
45
+ " - Ensure the code is self-contained and ready to execute."
46
+ )
47
 
48
  # --- Helper Functions ---
49
+
50
+ # Function to chat with the Gemini API and enforce structured JSON output
51
+ def chat_with_gemini(prompt, context):
52
+ """Sends a prompt and data context to the Gemini model for structured analysis (reasoning + code)."""
53
+
 
 
 
54
  url = f"{GEMINI_BASE}/{CHAT_MODEL}:generateContent?key={GEMINI_API_KEY}"
55
+
56
+ # Construct the full prompt including the data context
57
+ full_prompt = f"Data Context (DataFrame Head and Columns):\n{context}\n\nUser Question: {prompt}"
58
+
59
  payload = {
60
  "contents": [
61
+ {"parts": [{"text": full_prompt}]}
62
+ ],
63
+ "systemInstruction": {"parts": [{"text": SYSTEM_INSTRUCTION}]},
64
+ "generationConfig": {
65
+ "responseMimeType": "application/json",
66
+ "responseSchema": ANALYSIS_SCHEMA
67
+ }
68
  }
69
+
70
+ max_retries = 5
71
+ delay = 1
72
+ for attempt in range(max_retries):
73
+ try:
74
+ r = requests.post(url, headers={'Content-Type': 'application/json'}, data=json.dumps(payload))
75
+ r.raise_for_status()
76
+ data = r.json()
77
+
78
+ # The JSON output is a string inside the 'text' part
79
+ json_str = data["candidates"][0]["content"]["parts"][0]["text"]
80
+ return json.loads(json_str)
81
+
82
+ except requests.exceptions.RequestException as e:
83
+ if attempt < max_retries - 1:
84
+ time.sleep(delay)
85
+ delay *= 2
86
+ else:
87
+ st.error(f"API Request Failed: {e}")
88
+ raise e
89
+ except Exception as e:
90
+ st.error(f"Failed to parse model response or execute operation: {e}")
91
+ raise e
92
 
93
  # --- UI ---
94
+ st.title("✨ Perfect Data Analyst Agent (Code Execution Enabled)")
95
+ st.write("Upload a CSV file and ask natural language questions. The agent now generates and executes Python code to provide precise data analysis and visualizations.")
96
+
97
+ # State variable to hold the DataFrame, initialized once
98
+ if 'df' not in st.session_state:
99
+ st.session_state.df = pd.DataFrame()
100
 
101
  uploaded = st.file_uploader("Upload CSV", type=["csv"])
102
 
103
  if uploaded:
104
+ # Use st.cache_data to avoid reloading the file multiple times
105
+ @st.cache_data
106
+ def load_data(file):
 
 
 
 
107
  try:
108
+ return pd.read_csv(file)
 
 
109
  except Exception as e:
110
+ st.error(f"Failed to load CSV: {e}")
111
+ return pd.DataFrame()
112
+
113
+ st.session_state.df = load_data(uploaded)
114
+
115
+ if not st.session_state.df.empty:
116
+ st.subheader("Data Preview (First 5 Rows)")
117
+ st.dataframe(st.session_state.df.head())
118
+
119
+ question = st.text_area("Ask a complex question or request a visualization (e.g., 'Show the average of the 'Sales' column', 'Plot the distribution of 'Age'):")
120
+
121
+ if st.button("Analyze & Execute") and question:
122
+ df = st.session_state.df # Local variable for code execution context
123
+
124
+ # Summarize dataset for context sent to the LLM
125
+ context = f"Dataset Columns: {', '.join(df.columns.astype(str))}\n\nFirst 5 rows of data:\n{df.head(5).to_markdown(index=False)}"
126
+
127
+ st.markdown("---")
128
+ st.subheader("πŸ€– Analysis Steps")
129
+
130
+ with st.spinner("1. Generating analysis plan and code..."):
131
+ try:
132
+ # 1. Get structured response from LLM
133
+ analysis_result = chat_with_gemini(question, context)
134
+
135
+ reasoning = analysis_result.get('reasoning', "No reasoning provided.")
136
+ code = analysis_result.get('code', "")
137
+
138
+ st.markdown("#### πŸ’¬ Reasoning:")
139
+ st.markdown(reasoning)
140
+
141
+ st.markdown("#### 🐍 Generated Code:")
142
+ st.code(code, language='python')
143
+
144
+ except Exception as e:
145
+ st.error(f"Step 1 Failed (LLM Interaction): {e}")
146
+ reasoning = ""
147
+ code = ""
148
+
149
+ if code:
150
+ with st.spinner("2. Executing code and generating output..."):
151
+ try:
152
+ # 2. Execute the generated Python code safely
153
+
154
+ # IMPORTANT: Create a local scope with necessary variables (df, st)
155
+ local_scope = {
156
+ 'df': df,
157
+ 'st': st,
158
+ 'pd': pd,
159
+ 'np': np,
160
+ }
161
+ # Executing the code within the local scope
162
+ exec(code, globals(), local_scope)
163
+
164
+ st.success("Code execution complete. Results are displayed above.")
165
+
166
+ except Exception as e:
167
+ st.error(f"Step 2 Failed (Code Execution Error): The agent generated invalid code.")
168
+ st.exception(e)