Spaces:

yonkoyonks
/

csvBot

Build error

App Files Files Community

yonkoyonks commited on Oct 16, 2025

Commit

a40fc91

verified ·

1 Parent(s): d368d5c

Upload 4 files

Browse files

Files changed (4) hide show

.env +2 -0
portfolio3app.py +19 -0
requirements.txt +6 -3
utils.py +62 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+
2	+ MODEL_PATH=./models/gemma-2b-it.Q2_K.gguf

portfolio3app.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import streamlit as st
+import pandas as pd
+from utils import query_agent
+st.title("📊 Local Data Analysis Assistant")
+st.write("Upload a CSV and ask questions about your data!")
+uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
+if uploaded_file:
+    df = pd.read_csv(uploaded_file)
+    st.dataframe(df.head())
+    query = st.text_input("Ask a question about your dataset:")
+    if st.button("Analyze") and query:
+        with st.spinner("Thinking..."):
+            answer = query_agent(df, query)
+        st.subheader("Answer:")
+        st.write(answer)

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
-altair
-pandas
-streamlit

+streamlit>=1.36.0
+pandas>=2.0
+langchain>=0.2
+langchain-community>=0.2
+python-dotenv>=1.0
+llama-cpp-python>=0.2.90

utils.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from langchain_community.llms import LlamaCpp
+from dotenv import load_dotenv
+import os
+import pandas as pd
+# Load environment variables
+load_dotenv()
+MODEL_PATH = os.getenv("MODEL_PATH", "./models/gemma-2b-it.Q2_K.gguf")
+def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
+    """Summarize a dataframe to avoid overloading the context window."""
+    summary = f"Columns: {', '.join(df.columns)}\n\n"
+    if len(df) > max_rows:
+        sample = df.sample(max_rows, random_state=42)
+        summary += "Showing a random sample of rows:\n"
+    else:
+        sample = df
+        summary += "Showing all rows:\n"
+    summary += sample.to_string(index=False)
+    return summary
+def query_agent(df: pd.DataFrame, query: str) -> str:
+    """Query a CSV/DataFrame using your local Gemma model with context-aware limits."""
+    #  Attempt to handle simple analytical questions directly with pandas
+    query_lower = query.lower()
+    try:
+        if "most common" in query_lower or "most frequent" in query_lower:
+            for col in df.columns:
+                if col.lower() in query_lower:
+                    value = df[col].mode()[0]
+                    return f"The most common value in column '{col}' is '{value}'."
+    except Exception as e:
+        print("Direct analysis failed:", e)
+    #  Otherwise summarize dataset for LLM
+    data_text = summarize_dataframe(df)
+    prompt = f"""
+    You are a data analysis assistant with expertise in statistics and data interpretation.
+    Analyze the dataset sample below and answer the user's question in a **clear, detailed, and well-explained way**.
+    Include both the **direct answer** and a short **explanation or reasoning** behind it.
+    Dataset Summary:
+    {data_text}
+    Question:
+    {query}
+    Answer (with explanation):
+    """
+    llm = LlamaCpp(
+        model_path=MODEL_PATH,
+        temperature=0.7,
+        max_new_tokens=1024,
+        n_ctx=16384,
+        verbose=True,
+    )
+    answer = llm(prompt)
+    return answer