Spaces:

jsds003
/

AnalyticsAgent

Paused

App Files Files Community

jsds003 commited on Jul 3

Commit

2728d04

1 Parent(s): 27a10b6

Removed all content to test deployment

Browse files

Files changed (1) hide show

src/streamlit_app.py +0 -292

src/streamlit_app.py CHANGED Viewed

@@ -5,304 +5,12 @@ from pygwalker.api.streamlit import StreamlitRenderer
 import re
 from typing import List, Any
-@st.cache_resource
-def getPipeline():
-    return pipeline("text-generation", model="nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1")
-@st.cache_resource
-def get_pyg_renderer(df: pd.DataFrame):
-    return StreamlitRenderer(st.session_state.df)
-pipe = getPipeline()
-def FileSummaryHelper(df: pd.DataFrame) -> str:
-    """Gathers basiline information about the dataset"""
-    colSummaries = []
-    for col in df:
-        colSummaries.append(f"'{col}' | Data Type: {df[col].dtype} | Missing Percentage: {df[col].isna().mean()*100:.2f}%")
-    colTypesAndNulls = "\n".join(colSummaries)
-    duplicateVals = df.duplicated(keep=False).sum()
-    totalVals = len(df)
-    return f"""
-    The columns of the data have the following datatypes and missing value percentages:
-    {colTypesAndNulls}
-    The dataset has {totalVals} total rows.
-    The dataset has {duplicateVals} duplicated rows.
-    """
-def FileDescriptionAgent(userDesc:str, df: pd.DataFrame) -> str:
-    """Generates a description of the contents of the file based on initial analysis."""
-    userDesc = "" if not userDesc else "I have described the dataset as follows: " + userDesc
-    fileSummary = FileSummaryHelper(df)
-    prompt = f""" You are given a DataFrame `df` with columns: {', '.join(df.columns.tolist())}
-    {fileSummary}
-    {userDesc}
-    Qualitatively describe the dataset in 2-3 concise sentences. Your response must only include the description with no explanations before or after."""
-    messages = [
-       {"role": "system", "content": \
-           "detailed thinking off. You are an insightful Data Analyst."},
-       {"role": "user","content":prompt}
-    ]
-    response = pipe(messages, temperature = 0.2, max_new_tokens = 1024, return_full_text=False)[0]['generated_text']
-    return response
-def AnlaysisQuestionAgent(summary:str):
-    messages = [
-        {"role": "system", "content": \
-            """detailed thinking off. You are an inquisitive Data Analyst.
-            Given the following summary of a dataset, create a list of 3 analytical questions, following these rules:
-            Rules
-            -----
-            1. The questions must be answerable through simple Pandas operations with only the given data.
-            2. Your response must only include the three questions in a numbered list. Do not include explanations or caveats before or after.
-            3. Ensure the output list is formated: 1. question1, 2. question2, 3. question3
-            """},
-        {"role":"user","content":summary}
-    ]
-    response = pipe(messages, temperature = 0.2, max_new_tokens = 1024, return_full_text=False)[0]['generated_text']
-    parts = re.split(r'\d+\.\s*', response)
-    result = [p.strip() for p in parts if p]
-    return result
-def CodeGeneratorTool(cols: List[str], query: str) -> str:
-    """Generate a prompt for the LLM to write pandas-only code for a data query (no plotting)."""
-    return f"""
-    Given DataFrame `df` with columns: {', '.join(cols)}
-    Write Python code (pandas **only**, no plotting) to answer:
-    "{query}"
-    Rules
-    -----
-    1. Use pandas operations on `df` only.
-    2. Assign the final result to `result`.
-    3. Wrap the snippet in a single ```python code fence (no extra prose).
-    """
-def CodeExecutionHelper(code: str, df: pd.DataFrame):
-    """Executes the generated code, returning the result or error"""
-    env = {"pd": pd, "df": df}
-    try:
-        exec(code, {}, env)
-        return env.get("result", None)
-    except Exception as exc:
-        return f"Error executing code: {exc}"
-def CodeExtractorHelper(text: str) -> str:
-    """Extracts the first python code block from the output"""
-    start = text.find("```python")
-    if start == -1:
-        return ""
-    start += len("```python")
-    end = text.find("```", start)
-    if end == -1:
-            return ""
-    return text[start:end].strip()
-def ToolSelectorAgent(query: str, df: pd.DataFrame):
-    """Selects the appropriate tool for the users query"""
-    prompt = CodeGeneratorTool(df.columns.tolist(), query)
-    messages = [
-        {"role": "system", "content": \
-            "detailed thinking off. You are a Python data-analysis expert who writes clean, efficient code. \
-            Solve the given problem with optimal pandas operations. Be concise and focused. \
-            Your response must contain ONLY a properly-closed ```python code block with no explanations before or after. \
-            Ensure your solution is correct, handles edge cases, and follows best practices for data analysis."},
-        {"role": "user", "content": prompt}
-    ]
-    response = pipe(messages, temperature = 0.2, max_new_tokens = 1024, return_full_text=False)[0]['generated_text']
-    return CodeExtractorHelper(response)
-def ReasoningPromptGenerator(query: str, result: Any) -> str:
-    """Packages the output into a response, provinding reasoning about the result."""
-    isError = isinstance(result, str) and result.startswith("Error executing code")
-    if isError:
-        desc = result
-    else:
-        desc = str(result)[:300] #why slice it
-    prompt = f"""
-    The user asked: "{query}".
-    The result value is: {desc}
-    Explain in 2-3 concise sentences what this tells about the data (no mention of charts)."""
-    return prompt
-def ReasoningAgent(query: str, result: Any):
-    """Executes the reasoning prompt and returns the results and explination to the user"""
-    prompt = ReasoningPromptGenerator(query, result)
-    isError = isinstance(result, str) and result.startswith("Error executing code")
-    messages = [
-        {"role": "system", "content": \
-            "detailed thinking on. You are an insightful data analyst"},
-        {"role": "user","content": prompt}
-    ]
-    response = pipe(messages, temperature = 0.2, max_new_tokens = 1024, return_full_text=False)[0]['generated_text']
-    if "</think>" in response:
-        splitResponse = response.split("</think>",1)
-        response = splitResponse[1]
-        thinking = splitResponse[0]
-    return response, thinking
-def ResponseBuilderTool(question:str)->str:
-    code = ToolSelectorAgent(question, st.session_state.df)
-    result = CodeExecutionHelper(code, st.session_state.df)
-    reasoning_txt, raw_thinking  = ReasoningAgent(question, result)
-    reasoning_txt = reasoning_txt.replace("`", "")
-    # Build assistant response
-    if isinstance(result, (pd.DataFrame, pd.Series)):
-        header = f"Result: {len(result)} rows" if isinstance(result, pd.DataFrame) else "Result series"
-    else:
-        header = f"Result: {result}"
-    # Show only reasoning thinking in Model Thinking (collapsed by default)
-    thinking_html = ""
-    if raw_thinking:
-        thinking_html = (
-            '<details class="thinking">'
-            '<summary>🧠 Reasoning</summary>'
-            f'<pre>{raw_thinking}</pre>'
-            '</details>'
-        )
-    # Code accordion with proper HTML <pre><code> syntax highlighting
-    code_html = (
-        '<details class="code">'
-        '<summary>View code</summary>'
-        '<pre><code class="language-python">'
-        f'{code}'
-        '</code></pre>'
-        '</details>'
-    )
-    # Combine thinking, explanation, and code accordion
-    return f"{header}\n\n{thinking_html}{reasoning_txt}\n\n{code_html}"
 def main():
     """Streamlit App"""
     st.set_page_config(layout="wide")
     st.title("Analytics Agent")
-    file = st.file_uploader("Choose CSV", type=["csv"])
-    if file:
-        if("df" not in st.session_state) or (st.session_state.get("current_file") != file.name):
-                st.session_state.df = pd.read_csv(file)
-                st.session_state.current_file = file.name
-                with st.spinner("Summarizing..."):
-                    st.session_state.file_summary = FileDescriptionAgent("",st.session_state.df)
-        st.markdown("### Data Summary:")
-        st.text(st.session_state.file_summary)
-        pygApp = get_pyg_renderer(st.session_state.df)
-        pygApp.explorer(default_tab="data")
-        st.markdown(
-            """
-            <style>
-                section[data-testid="stSidebar"] {
-                    width: 500px !important; # Set the width to your desired value
-                }
-            </style>
-            """,
-            unsafe_allow_html=True,
-        )
-        with st.sidebar:
-            st.markdown("## Analysis Discussion:")
-            if("first_question" not in st.session_state):
-                st.session_state.first_question = ""
-            if("num_question_asked" not in st.session_state):
-                st.session_state.num_question_asked = 0
-            if("messages" not in st.session_state):
-                st.session_state.messages = []
-            if st.session_state.num_question_asked == 0:
-                with st.spinner("Preparing Anlaysis..."):
-                    if("analsyis_questions" not in st.session_state):
-                        st.session_state.analsyis_questions = AnlaysisQuestionAgent(st.session_state.file_summary)
-                with st.container():
-                    if q1:= st.button(st.session_state.analsyis_questions[0]):
-                        st.session_state.first_question = st.session_state.analsyis_questions[0]
-                    if q2:= st.button(st.session_state.analsyis_questions[1]):
-                        st.session_state.first_question = st.session_state.analsyis_questions[1]
-                    if q3:= st.button(st.session_state.analsyis_questions[2]):
-                        st.session_state.first_question = st.session_state.analsyis_questions[2]
-                    chat = st.chat_input("Something else...")
-                    if chat:
-                        st.session_state.first_question = chat
-                    st.session_state.num_question_asked += 1 if(q1 or q2 or q3 or chat is not None) else 0
-                    if st.session_state.num_question_asked == 1:
-                        st.session_state.messages.append({"role": "user", "content": st.session_state.first_question})
-                    st.rerun()
-            elif st.session_state.num_question_asked == 1:
-                with st.container():
-                    for msg in st.session_state.messages:
-                        with st.chat_message(msg["role"]):
-                            st.markdown(msg["content"], unsafe_allow_html=True)
-                with st.spinner("Working …"):
-                    st.session_state.messages.append({
-                        "role": "assistant",
-                        "content": ResponseBuilderTool(st.session_state.first_question)
-                    })
-                st.session_state.num_question_asked += 1
-                st.rerun()
-            else:
-                with st.container():
-                    for msg in st.session_state.messages:
-                        with st.chat_message(msg["role"]):
-                            st.markdown(msg["content"], unsafe_allow_html=True)
-                if user_q := st.chat_input("Ask about your data…"):
-                    st.session_state.messages.append({"role": "user", "content": user_q})
-                    with st.spinner("Working …"):
-                        st.session_state.messages.append({
-                            "role": "assistant",
-                            "content": ResponseBuilderTool(user_q)
-                        })
-                    st.session_state.num_question_asked += 1
-                    st.rerun()
 if __name__ == "__main__":
     main()

 import re
 from typing import List, Any
 def main():
     """Streamlit App"""
     st.set_page_config(layout="wide")
     st.title("Analytics Agent")
 if __name__ == "__main__":
     main()