Spaces:

DrMostafa
/

llamaindex

Sleeping

App Files Files Community

DrMostafa commited on Jan 26

Commit

18d549e

verified ·

1 Parent(s): 65030b7

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +155 -122

src/streamlit_app.py CHANGED Viewed

@@ -39,15 +39,7 @@ import traceback
 import inspect
 import nest_asyncio
-# ✅ allow nested event loops
-nest_asyncio.apply()
-# ✅ explicitly create and set a running loop (Python 3.13 fix)
-try:
-    loop = asyncio.get_event_loop()
-except RuntimeError:
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
 from llama_index.core import Document, Settings
 from llama_index.llms.openai import OpenAI
@@ -64,157 +56,198 @@ st.set_page_config(page_title="Excel Agent with LlamaIndex", layout="wide")
 st.title("📊 Excel Data Agent (LlamaIndex)")
 st.write("Upload your Excel file to chat with all its sheets, run code, and get schema analysis.")
 # ---------------------------------------------------------
 # -- Hardcoded API KEYS --
 os.environ["OPENAI_API_KEY"] = "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"
 os.environ["LLAMA_CLOUD_API_KEY"] = "llx-tj6qAHSzvNsEsAXe6kxT5XYIclsN6s7AfYAnnlLduQutQ3Gx"
-# ---------------------------------------------------------
-# 📤 FILE UPLOAD
-# ---------------------------------------------------------
-uploaded_file = st.file_uploader("Upload your Excel file (.xlsx)", type=["xlsx"])
 if uploaded_file:
     xls = pd.ExcelFile(uploaded_file)
     sheet_names = xls.sheet_names
-    all_dfs = {sheet: xls.parse(sheet) for sheet in sheet_names}
-    st.sidebar.header("Sheets in file:")
-    for sheet, df in all_dfs.items():
-        st.sidebar.write(f"**{sheet}** ({df.shape[0]} rows, {df.shape[1]} cols)")
-    with st.expander("Preview All Sheets"):
-        for sheet, df in all_dfs.items():
-            st.subheader(sheet)
             st.dataframe(df.head(10))
-    # ---------------------------------------------------------
-    # 🧠 CREATE LlamaIndex DOCUMENTS
-    # ---------------------------------------------------------
-    docs = [
-        Document(
-            text=f"Sheet '{sheet}':\n" + df.head(100).to_csv(index=False),
-            metadata={"sheet": sheet},
-        )
-        for sheet, df in all_dfs.items()
-    ]
-    # ---------------------------------------------------------
-    # 🚀 INITIALIZE LLM + EMBEDDINGS
-    # ---------------------------------------------------------
-    llm = OpenAI(model="gpt-4o")
-    Settings.llm = llm
     Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
-    node_parser = SentenceSplitter()
-    agents_dict = {}
-    for doc in docs:
-        nodes = node_parser.get_nodes_from_documents([doc])
-        vector_index = VectorStoreIndex(nodes)
-        summary_index = SummaryIndex(nodes)
-        vector_engine = vector_index.as_query_engine(llm=llm)
-        summary_engine = summary_index.as_query_engine(response_mode="tree_summarize", llm=llm)
         tools = [
-            QueryEngineTool.from_defaults(query_engine=vector_engine, name=f"vector_{doc.metadata['sheet']}"),
-            QueryEngineTool.from_defaults(query_engine=summary_engine, name=f"summary_{doc.metadata['sheet']}"),
         ]
-        sheet_agent = FunctionAgent(
             tools=tools,
             llm=llm,
-            system_prompt=f"You are an agent focused on the '{doc.metadata['sheet']}' sheet of the uploaded Excel file.",
         )
-        agents_dict[doc.metadata["sheet"]] = sheet_agent
-    # ---------------------------------------------------------
-    # ⛓ Wrap per-sheet agents into tools for the top-level agent
-    # ---------------------------------------------------------
-    def get_agent_tool_callable(agent):
-        def query_agent(query: str) -> str:
-            async def runner():
-                return await agent.run(query)
-            coro = runner()
-            return loop.run_until_complete(coro)
-        return query_agent
     all_tools = []
-    for sheet, agent in agents_dict.items():
-        sync_fn = get_agent_tool_callable(agent)
-        all_tools.append(
-            FunctionTool.from_defaults(sync_fn, name=f"tool_{sheet}", description=f"Ask about '{sheet}' sheet.")
-        )
     top_agent = FunctionAgent(
         tools=all_tools,
         llm=llm,
-        system_prompt="You are an overall Excel data agent. You can access tools corresponding to each sheet.",
     )
-    # ---------------------------------------------------------
-    # 🧾 Schema analysis
-    # ---------------------------------------------------------
-    st.header("🔎 Automatic Schema Analysis")
-    schema_report = ""
-    for sheet, df in all_dfs.items():
-        schema_report += f"**Sheet:** `{sheet}`\n- Columns: {list(df.columns)}\n"
-        schema_report += f"- Sample Row: {df.iloc[0].to_dict() if not df.empty else 'Sheet empty'}\n\n"
-    relationships = []
-    for s1, df1 in all_dfs.items():
-        for s2, df2 in all_dfs.items():
-            if s1 != s2:
-                common = set(df1.columns) & set(df2.columns)
-                if common:
-                    relationships.append(f"Possible relationship between `{s1}` and `{s2}` on columns {common}")
-    if relationships:
-        schema_report += "**Inferred Relationships:**\n- " + "\n- ".join(relationships)
-    st.markdown(schema_report)
-    # ---------------------------------------------------------
-    # 💬 Ask / Run agent
-    # ---------------------------------------------------------
-    st.header("💬 Ask the Agent (about your Excel data)")
-    user_query = st.text_area("Enter a question or command (e.g. 'plot last column', 'summarize sales by region').")
     def extract_code_blocks(text):
-        pattern = r"```(?:python)?\n(.*?)\n```"
-        return re.findall(pattern, text, re.DOTALL)
-    def run_code(code, local_vars):
-        output = io.StringIO()
-        with contextlib.redirect_stdout(output), contextlib.redirect_stderr(output):
             try:
-                exec(code, {"pd": pd, "st": st, **local_vars})
             except Exception as e:
-                print("Error executing code:", e)
-        return output.getvalue()
-    if st.button("Run Agent"):
-        with st.spinner("Agent is thinking..."):
-            try:
-                async def agent_runner():
-                    return await top_agent.run(user_query)
-                response = loop.run_until_complete(agent_runner())
-                response_str = str(response)
-                st.markdown("### Agent Response:")
-                st.markdown(response_str)
-                code_blocks = extract_code_blocks(response_str)
-                if code_blocks:
-                    st.markdown("#### Executing Code Blocks:")
-                for idx, code in enumerate(code_blocks):
                     st.code(code, language="python")
-                    output = run_code(code, {"all_dfs": all_dfs})
-                    if output:
-                        st.text_area(f"Output of code block {idx+1}:", output, height=150)
-            except Exception:
-                st.error("Error during agent execution:\n" + traceback.format_exc())
 else:
-    st.info("Please upload an Excel file to get started.")

 import inspect
 import nest_asyncio
 from llama_index.core import Document, Settings
 from llama_index.llms.openai import OpenAI
 st.title("📊 Excel Data Agent (LlamaIndex)")
 st.write("Upload your Excel file to chat with all its sheets, run code, and get schema analysis.")
+import streamlit as st
+import pandas as pd
+import io
+import os
+import asyncio
+import traceback
+import contextlib
+import matplotlib.pyplot as plt
+import nest_asyncio
+import inspect
+import re
+# ✅ Asyncio + Streamlit compatibility for Python 3.13
+nest_asyncio.apply()
+try:
+    loop = asyncio.get_event_loop()
+except RuntimeError:
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+# -----------------------------------------------------
+# 🔧 LlamaIndex + OpenAI
+# -----------------------------------------------------
+from llama_index.core import Document, Settings, VectorStoreIndex, SummaryIndex
+from llama_index.core.llms import ChatMessage
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai import OpenAI
+from llama_index.core.agent.workflow import FunctionAgent
+from llama_index.core.tools import QueryEngineTool, FunctionTool
+from llama_index.core.node_parser import SentenceSplitter
+# -----------------------------------------------------
+# 🌐 Setup basic configs
+# -----------------------------------------------------
+st.set_page_config(page_title="Excel AI Analyst", layout="wide")
+st.title("📘 Excel AI Analyst – Chat, Code, Analyze & Plot")
 # ---------------------------------------------------------
 # -- Hardcoded API KEYS --
 os.environ["OPENAI_API_KEY"] = "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"
 os.environ["LLAMA_CLOUD_API_KEY"] = "llx-tj6qAHSzvNsEsAXe6kxT5XYIclsN6s7AfYAnnlLduQutQ3Gx"
+# -----------------------------------------------------
+# 📂 File upload
+# -----------------------------------------------------
+uploaded_file = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
+# Maintain conversation state
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+if "top_agent" not in st.session_state:
+    st.session_state.top_agent = None
+# -----------------------------------------------------
+# 🧠 Build Agents after upload
+# -----------------------------------------------------
 if uploaded_file:
     xls = pd.ExcelFile(uploaded_file)
     sheet_names = xls.sheet_names
+    all_dfs = {s: xls.parse(s) for s in sheet_names}
+    # Sidebar info
+    st.sidebar.header("Sheets Info")
+    for s, df in all_dfs.items():
+        st.sidebar.write(f"**{s}** - {df.shape[0]}×{df.shape[1]}")
+    # Preview
+    with st.expander("📄 Preview Sheets"):
+        for s, df in all_dfs.items():
+            st.subheader(s)
             st.dataframe(df.head(10))
+    # -------------------------------------------------
+    # Create LlamaIndex agents per-sheet
+    # -------------------------------------------------
     Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
+    llm = OpenAI(model="gpt-4o-mini", temperature=0.4)
+    splitter = SentenceSplitter()
+    sheet_agents = {}
+    for name, df in all_dfs.items():
+        doc = Document(text=f"Excel sheet {name}:\n{df.head(100).to_csv(index=False)}", metadata={"sheet": name})
+        nodes = splitter.get_nodes_from_documents([doc])
+        vector_idx = VectorStoreIndex(nodes)
+        summary_idx = SummaryIndex(nodes)
         tools = [
+            QueryEngineTool.from_defaults(query_engine=vector_idx.as_query_engine(llm=llm), name=f"vector_{name}"),
+            QueryEngineTool.from_defaults(
+                query_engine=summary_idx.as_query_engine(response_mode="tree_summarize", llm=llm),
+                name=f"summary_{name}")
         ]
+        agent = FunctionAgent(
             tools=tools,
             llm=llm,
+            system_prompt=f"You are a data analysis assistant specialized in the Excel sheet '{name}'."
         )
+        sheet_agents[name] = agent
     all_tools = []
+    for sname, agent in sheet_agents.items():
+        def make_callable(agent_ref):
+            def call(query: str) -> str:
+                async def run_agent():
+                    return await agent_ref.run(query)
+                return loop.run_until_complete(run_agent())
+            return call
+        fn = make_callable(agent)
+        all_tools.append(FunctionTool.from_defaults(fn, name=f"Sheet_{sname}", description=f"Analyze sheet {sname}."))
     top_agent = FunctionAgent(
         tools=all_tools,
         llm=llm,
+        system_prompt="You are a top-level Excel analysis assistant. Use sheet tools or generate Python code to analyze data."
     )
+    st.session_state.top_agent = top_agent
+    # -------------------------------------------------
+    # Schema summary
+    # -------------------------------------------------
+    st.subheader("🧩 Schema Summary")
+    for s, df in all_dfs.items():
+        st.markdown(f"**{s}** — {df.shape[0]} rows × {df.shape[1]} cols")
+        st.write(list(df.columns))
+    # -------------------------------------------------
+    # Conversational interface
+    # -------------------------------------------------
+    st.subheader("💬 Chat with Excel Agent")
+    user_query = st.chat_input("Ask or instruct (e.g. 'plot last column', 'compare sales by region')")
     def extract_code_blocks(text):
+        return re.findall(r"```(?:python)?\n(.*?)```", text, re.DOTALL)
+    def run_user_code(code, context_vars):
+        string_out = io.StringIO()
+        with contextlib.redirect_stdout(string_out):
             try:
+                exec(code, {"pd": pd, "plt": plt, "st": st, **context_vars})
             except Exception as e:
+                print(f"Error: {e}")
+        return string_out.getvalue()
+    async def stream_response(agent, query):
+        # Basic token streaming using chunked yield
+        yield "🧠 Thinking...\n\n"
+        resp = await agent.run(query)
+        yield str(resp)
+    if user_query:
+        st.session_state.chat_history.append(ChatMessage(role="user", content=user_query))
+        with st.chat_message("user"):
+            st.markdown(user_query)
+        with st.chat_message("assistant"):
+            message_placeholder = st.empty()
+            full_resp = ""
+            async def gather():
+                async for part in stream_response(st.session_state.top_agent, user_query):
+                    nonlocal full_resp
+                    full_resp += part
+                    message_placeholder.markdown(full_resp)
+                return full_resp
+            resp_text = loop.run_until_complete(gather())
+            # Store in chat history
+            st.session_state.chat_history.append(ChatMessage(role="assistant", content=resp_text))
+            # Detect and run any code
+            code_blocks = extract_code_blocks(resp_text)
+            if code_blocks:
+                st.markdown("#### 🧩 Code Detected — Running:")
+                for i, code in enumerate(code_blocks):
                     st.code(code, language="python")
+                    output = run_user_code(code, {"all_dfs": all_dfs})
+                    if output.strip():
+                        st.text_area(f"Output {i+1}:", output, height=150)
+    # Display past chat history
+    if st.session_state.chat_history:
+        st.divider()
+        st.subheader("🪶 Conversation History")
+        for msg in st.session_state.chat_history:
+            role = "🧍 User" if msg.role == "user" else "🤖 Agent"
+            st.markdown(f"**{role}:** {msg.content}")
 else:
+    st.info("Upload an Excel file to get started 📤.")