DrMostafa commited on
Commit
18d549e
Β·
verified Β·
1 Parent(s): 65030b7

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +155 -122
src/streamlit_app.py CHANGED
@@ -39,15 +39,7 @@ import traceback
39
  import inspect
40
  import nest_asyncio
41
 
42
- # βœ… allow nested event loops
43
- nest_asyncio.apply()
44
 
45
- # βœ… explicitly create and set a running loop (Python 3.13 fix)
46
- try:
47
- loop = asyncio.get_event_loop()
48
- except RuntimeError:
49
- loop = asyncio.new_event_loop()
50
- asyncio.set_event_loop(loop)
51
 
52
  from llama_index.core import Document, Settings
53
  from llama_index.llms.openai import OpenAI
@@ -64,157 +56,198 @@ st.set_page_config(page_title="Excel Agent with LlamaIndex", layout="wide")
64
  st.title("πŸ“Š Excel Data Agent (LlamaIndex)")
65
  st.write("Upload your Excel file to chat with all its sheets, run code, and get schema analysis.")
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # ---------------------------------------------------------
68
  # -- Hardcoded API KEYS --
69
  os.environ["OPENAI_API_KEY"] = "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"
70
  os.environ["LLAMA_CLOUD_API_KEY"] = "llx-tj6qAHSzvNsEsAXe6kxT5XYIclsN6s7AfYAnnlLduQutQ3Gx"
71
 
 
 
 
 
72
 
73
- # ---------------------------------------------------------
74
- # πŸ“€ FILE UPLOAD
75
- # ---------------------------------------------------------
76
- uploaded_file = st.file_uploader("Upload your Excel file (.xlsx)", type=["xlsx"])
 
 
77
 
 
 
 
78
  if uploaded_file:
79
  xls = pd.ExcelFile(uploaded_file)
80
  sheet_names = xls.sheet_names
81
- all_dfs = {sheet: xls.parse(sheet) for sheet in sheet_names}
82
 
83
- st.sidebar.header("Sheets in file:")
84
- for sheet, df in all_dfs.items():
85
- st.sidebar.write(f"**{sheet}** ({df.shape[0]} rows, {df.shape[1]} cols)")
 
86
 
87
- with st.expander("Preview All Sheets"):
88
- for sheet, df in all_dfs.items():
89
- st.subheader(sheet)
 
90
  st.dataframe(df.head(10))
91
 
92
- # ---------------------------------------------------------
93
- # 🧠 CREATE LlamaIndex DOCUMENTS
94
- # ---------------------------------------------------------
95
- docs = [
96
- Document(
97
- text=f"Sheet '{sheet}':\n" + df.head(100).to_csv(index=False),
98
- metadata={"sheet": sheet},
99
- )
100
- for sheet, df in all_dfs.items()
101
- ]
102
-
103
- # ---------------------------------------------------------
104
- # πŸš€ INITIALIZE LLM + EMBEDDINGS
105
- # ---------------------------------------------------------
106
- llm = OpenAI(model="gpt-4o")
107
- Settings.llm = llm
108
  Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
 
109
 
110
- node_parser = SentenceSplitter()
111
- agents_dict = {}
112
 
113
- for doc in docs:
114
- nodes = node_parser.get_nodes_from_documents([doc])
115
- vector_index = VectorStoreIndex(nodes)
116
- summary_index = SummaryIndex(nodes)
117
- vector_engine = vector_index.as_query_engine(llm=llm)
118
- summary_engine = summary_index.as_query_engine(response_mode="tree_summarize", llm=llm)
119
 
120
  tools = [
121
- QueryEngineTool.from_defaults(query_engine=vector_engine, name=f"vector_{doc.metadata['sheet']}"),
122
- QueryEngineTool.from_defaults(query_engine=summary_engine, name=f"summary_{doc.metadata['sheet']}"),
 
 
123
  ]
124
 
125
- sheet_agent = FunctionAgent(
126
  tools=tools,
127
  llm=llm,
128
- system_prompt=f"You are an agent focused on the '{doc.metadata['sheet']}' sheet of the uploaded Excel file.",
129
  )
130
- agents_dict[doc.metadata["sheet"]] = sheet_agent
131
-
132
- # ---------------------------------------------------------
133
- # β›“ Wrap per-sheet agents into tools for the top-level agent
134
- # ---------------------------------------------------------
135
- def get_agent_tool_callable(agent):
136
- def query_agent(query: str) -> str:
137
- async def runner():
138
- return await agent.run(query)
139
-
140
- coro = runner()
141
- return loop.run_until_complete(coro)
142
-
143
- return query_agent
144
 
145
  all_tools = []
146
- for sheet, agent in agents_dict.items():
147
- sync_fn = get_agent_tool_callable(agent)
148
- all_tools.append(
149
- FunctionTool.from_defaults(sync_fn, name=f"tool_{sheet}", description=f"Ask about '{sheet}' sheet.")
150
- )
 
 
 
 
151
 
152
  top_agent = FunctionAgent(
153
  tools=all_tools,
154
  llm=llm,
155
- system_prompt="You are an overall Excel data agent. You can access tools corresponding to each sheet.",
156
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
- # ---------------------------------------------------------
159
- # 🧾 Schema analysis
160
- # ---------------------------------------------------------
161
- st.header("πŸ”Ž Automatic Schema Analysis")
162
- schema_report = ""
163
- for sheet, df in all_dfs.items():
164
- schema_report += f"**Sheet:** `{sheet}`\n- Columns: {list(df.columns)}\n"
165
- schema_report += f"- Sample Row: {df.iloc[0].to_dict() if not df.empty else 'Sheet empty'}\n\n"
166
-
167
- relationships = []
168
- for s1, df1 in all_dfs.items():
169
- for s2, df2 in all_dfs.items():
170
- if s1 != s2:
171
- common = set(df1.columns) & set(df2.columns)
172
- if common:
173
- relationships.append(f"Possible relationship between `{s1}` and `{s2}` on columns {common}")
174
- if relationships:
175
- schema_report += "**Inferred Relationships:**\n- " + "\n- ".join(relationships)
176
- st.markdown(schema_report)
177
-
178
- # ---------------------------------------------------------
179
- # πŸ’¬ Ask / Run agent
180
- # ---------------------------------------------------------
181
- st.header("πŸ’¬ Ask the Agent (about your Excel data)")
182
- user_query = st.text_area("Enter a question or command (e.g. 'plot last column', 'summarize sales by region').")
183
 
184
  def extract_code_blocks(text):
185
- pattern = r"```(?:python)?\n(.*?)\n```"
186
- return re.findall(pattern, text, re.DOTALL)
187
 
188
- def run_code(code, local_vars):
189
- output = io.StringIO()
190
- with contextlib.redirect_stdout(output), contextlib.redirect_stderr(output):
191
  try:
192
- exec(code, {"pd": pd, "st": st, **local_vars})
193
  except Exception as e:
194
- print("Error executing code:", e)
195
- return output.getvalue()
196
-
197
- if st.button("Run Agent"):
198
- with st.spinner("Agent is thinking..."):
199
- try:
200
- async def agent_runner():
201
- return await top_agent.run(user_query)
202
-
203
- response = loop.run_until_complete(agent_runner())
204
- response_str = str(response)
205
- st.markdown("### Agent Response:")
206
- st.markdown(response_str)
207
-
208
- code_blocks = extract_code_blocks(response_str)
209
- if code_blocks:
210
- st.markdown("#### Executing Code Blocks:")
211
- for idx, code in enumerate(code_blocks):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  st.code(code, language="python")
213
- output = run_code(code, {"all_dfs": all_dfs})
214
- if output:
215
- st.text_area(f"Output of code block {idx+1}:", output, height=150)
216
- except Exception:
217
- st.error("Error during agent execution:\n" + traceback.format_exc())
 
 
 
 
 
 
218
 
219
  else:
220
- st.info("Please upload an Excel file to get started.")
 
39
  import inspect
40
  import nest_asyncio
41
 
 
 
42
 
 
 
 
 
 
 
43
 
44
  from llama_index.core import Document, Settings
45
  from llama_index.llms.openai import OpenAI
 
56
  st.title("πŸ“Š Excel Data Agent (LlamaIndex)")
57
  st.write("Upload your Excel file to chat with all its sheets, run code, and get schema analysis.")
58
 
59
+
60
+
61
+ import streamlit as st
62
+ import pandas as pd
63
+ import io
64
+ import os
65
+ import asyncio
66
+ import traceback
67
+ import contextlib
68
+ import matplotlib.pyplot as plt
69
+ import nest_asyncio
70
+ import inspect
71
+ import re
72
+
73
+ # βœ… Asyncio + Streamlit compatibility for Python 3.13
74
+ nest_asyncio.apply()
75
+ try:
76
+ loop = asyncio.get_event_loop()
77
+ except RuntimeError:
78
+ loop = asyncio.new_event_loop()
79
+ asyncio.set_event_loop(loop)
80
+
81
+ # -----------------------------------------------------
82
+ # πŸ”§ LlamaIndex + OpenAI
83
+ # -----------------------------------------------------
84
+ from llama_index.core import Document, Settings, VectorStoreIndex, SummaryIndex
85
+ from llama_index.core.llms import ChatMessage
86
+ from llama_index.embeddings.openai import OpenAIEmbedding
87
+ from llama_index.llms.openai import OpenAI
88
+ from llama_index.core.agent.workflow import FunctionAgent
89
+ from llama_index.core.tools import QueryEngineTool, FunctionTool
90
+ from llama_index.core.node_parser import SentenceSplitter
91
+
92
+ # -----------------------------------------------------
93
+ # 🌐 Setup basic configs
94
+ # -----------------------------------------------------
95
+ st.set_page_config(page_title="Excel AI Analyst", layout="wide")
96
+ st.title("πŸ“˜ Excel AI Analyst – Chat, Code, Analyze & Plot")
97
+
98
  # ---------------------------------------------------------
99
  # -- Hardcoded API KEYS --
100
  os.environ["OPENAI_API_KEY"] = "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"
101
  os.environ["LLAMA_CLOUD_API_KEY"] = "llx-tj6qAHSzvNsEsAXe6kxT5XYIclsN6s7AfYAnnlLduQutQ3Gx"
102
 
103
+ # -----------------------------------------------------
104
+ # πŸ“‚ File upload
105
+ # -----------------------------------------------------
106
+ uploaded_file = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
107
 
108
+ # Maintain conversation state
109
+ if "chat_history" not in st.session_state:
110
+ st.session_state.chat_history = []
111
+
112
+ if "top_agent" not in st.session_state:
113
+ st.session_state.top_agent = None
114
 
115
+ # -----------------------------------------------------
116
+ # 🧠 Build Agents after upload
117
+ # -----------------------------------------------------
118
  if uploaded_file:
119
  xls = pd.ExcelFile(uploaded_file)
120
  sheet_names = xls.sheet_names
121
+ all_dfs = {s: xls.parse(s) for s in sheet_names}
122
 
123
+ # Sidebar info
124
+ st.sidebar.header("Sheets Info")
125
+ for s, df in all_dfs.items():
126
+ st.sidebar.write(f"**{s}** - {df.shape[0]}Γ—{df.shape[1]}")
127
 
128
+ # Preview
129
+ with st.expander("πŸ“„ Preview Sheets"):
130
+ for s, df in all_dfs.items():
131
+ st.subheader(s)
132
  st.dataframe(df.head(10))
133
 
134
+ # -------------------------------------------------
135
+ # Create LlamaIndex agents per-sheet
136
+ # -------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
138
+ llm = OpenAI(model="gpt-4o-mini", temperature=0.4)
139
 
140
+ splitter = SentenceSplitter()
141
+ sheet_agents = {}
142
 
143
+ for name, df in all_dfs.items():
144
+ doc = Document(text=f"Excel sheet {name}:\n{df.head(100).to_csv(index=False)}", metadata={"sheet": name})
145
+ nodes = splitter.get_nodes_from_documents([doc])
146
+ vector_idx = VectorStoreIndex(nodes)
147
+ summary_idx = SummaryIndex(nodes)
 
148
 
149
  tools = [
150
+ QueryEngineTool.from_defaults(query_engine=vector_idx.as_query_engine(llm=llm), name=f"vector_{name}"),
151
+ QueryEngineTool.from_defaults(
152
+ query_engine=summary_idx.as_query_engine(response_mode="tree_summarize", llm=llm),
153
+ name=f"summary_{name}")
154
  ]
155
 
156
+ agent = FunctionAgent(
157
  tools=tools,
158
  llm=llm,
159
+ system_prompt=f"You are a data analysis assistant specialized in the Excel sheet '{name}'."
160
  )
161
+ sheet_agents[name] = agent
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  all_tools = []
164
+ for sname, agent in sheet_agents.items():
165
+ def make_callable(agent_ref):
166
+ def call(query: str) -> str:
167
+ async def run_agent():
168
+ return await agent_ref.run(query)
169
+ return loop.run_until_complete(run_agent())
170
+ return call
171
+ fn = make_callable(agent)
172
+ all_tools.append(FunctionTool.from_defaults(fn, name=f"Sheet_{sname}", description=f"Analyze sheet {sname}."))
173
 
174
  top_agent = FunctionAgent(
175
  tools=all_tools,
176
  llm=llm,
177
+ system_prompt="You are a top-level Excel analysis assistant. Use sheet tools or generate Python code to analyze data."
178
  )
179
+ st.session_state.top_agent = top_agent
180
+
181
+ # -------------------------------------------------
182
+ # Schema summary
183
+ # -------------------------------------------------
184
+ st.subheader("🧩 Schema Summary")
185
+ for s, df in all_dfs.items():
186
+ st.markdown(f"**{s}** β€” {df.shape[0]} rows Γ— {df.shape[1]} cols")
187
+ st.write(list(df.columns))
188
+
189
+ # -------------------------------------------------
190
+ # Conversational interface
191
+ # -------------------------------------------------
192
+ st.subheader("πŸ’¬ Chat with Excel Agent")
193
 
194
+ user_query = st.chat_input("Ask or instruct (e.g. 'plot last column', 'compare sales by region')")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  def extract_code_blocks(text):
197
+ return re.findall(r"```(?:python)?\n(.*?)```", text, re.DOTALL)
 
198
 
199
+ def run_user_code(code, context_vars):
200
+ string_out = io.StringIO()
201
+ with contextlib.redirect_stdout(string_out):
202
  try:
203
+ exec(code, {"pd": pd, "plt": plt, "st": st, **context_vars})
204
  except Exception as e:
205
+ print(f"Error: {e}")
206
+ return string_out.getvalue()
207
+
208
+ async def stream_response(agent, query):
209
+ # Basic token streaming using chunked yield
210
+ yield "🧠 Thinking...\n\n"
211
+ resp = await agent.run(query)
212
+ yield str(resp)
213
+
214
+ if user_query:
215
+ st.session_state.chat_history.append(ChatMessage(role="user", content=user_query))
216
+ with st.chat_message("user"):
217
+ st.markdown(user_query)
218
+
219
+ with st.chat_message("assistant"):
220
+ message_placeholder = st.empty()
221
+ full_resp = ""
222
+ async def gather():
223
+ async for part in stream_response(st.session_state.top_agent, user_query):
224
+ nonlocal full_resp
225
+ full_resp += part
226
+ message_placeholder.markdown(full_resp)
227
+ return full_resp
228
+
229
+ resp_text = loop.run_until_complete(gather())
230
+
231
+ # Store in chat history
232
+ st.session_state.chat_history.append(ChatMessage(role="assistant", content=resp_text))
233
+
234
+ # Detect and run any code
235
+ code_blocks = extract_code_blocks(resp_text)
236
+ if code_blocks:
237
+ st.markdown("#### 🧩 Code Detected β€” Running:")
238
+ for i, code in enumerate(code_blocks):
239
  st.code(code, language="python")
240
+ output = run_user_code(code, {"all_dfs": all_dfs})
241
+ if output.strip():
242
+ st.text_area(f"Output {i+1}:", output, height=150)
243
+
244
+ # Display past chat history
245
+ if st.session_state.chat_history:
246
+ st.divider()
247
+ st.subheader("πŸͺΆ Conversation History")
248
+ for msg in st.session_state.chat_history:
249
+ role = "🧍 User" if msg.role == "user" else "πŸ€– Agent"
250
+ st.markdown(f"**{role}:** {msg.content}")
251
 
252
  else:
253
+ st.info("Upload an Excel file to get started πŸ“€.")