yonkoyonks commited on
Commit
a40fc91
·
verified ·
1 Parent(s): d368d5c

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env +2 -0
  2. portfolio3app.py +19 -0
  3. requirements.txt +6 -3
  4. utils.py +62 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+ MODEL_PATH=./models/gemma-2b-it.Q2_K.gguf
portfolio3app.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from utils import query_agent
4
+
5
+ st.title("📊 Local Data Analysis Assistant")
6
+ st.write("Upload a CSV and ask questions about your data!")
7
+
8
+ uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
9
+
10
+ if uploaded_file:
11
+ df = pd.read_csv(uploaded_file)
12
+ st.dataframe(df.head())
13
+
14
+ query = st.text_input("Ask a question about your dataset:")
15
+ if st.button("Analyze") and query:
16
+ with st.spinner("Thinking..."):
17
+ answer = query_agent(df, query)
18
+ st.subheader("Answer:")
19
+ st.write(answer)
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
1
+ streamlit>=1.36.0
2
+ pandas>=2.0
3
+ langchain>=0.2
4
+ langchain-community>=0.2
5
+ python-dotenv>=1.0
6
+ llama-cpp-python>=0.2.90
utils.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.llms import LlamaCpp
2
+ from dotenv import load_dotenv
3
+ import os
4
+ import pandas as pd
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+ MODEL_PATH = os.getenv("MODEL_PATH", "./models/gemma-2b-it.Q2_K.gguf")
9
+
10
+ def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
11
+ """Summarize a dataframe to avoid overloading the context window."""
12
+ summary = f"Columns: {', '.join(df.columns)}\n\n"
13
+ if len(df) > max_rows:
14
+ sample = df.sample(max_rows, random_state=42)
15
+ summary += "Showing a random sample of rows:\n"
16
+ else:
17
+ sample = df
18
+ summary += "Showing all rows:\n"
19
+ summary += sample.to_string(index=False)
20
+ return summary
21
+
22
+ def query_agent(df: pd.DataFrame, query: str) -> str:
23
+ """Query a CSV/DataFrame using your local Gemma model with context-aware limits."""
24
+ # Attempt to handle simple analytical questions directly with pandas
25
+ query_lower = query.lower()
26
+ try:
27
+ if "most common" in query_lower or "most frequent" in query_lower:
28
+ for col in df.columns:
29
+ if col.lower() in query_lower:
30
+ value = df[col].mode()[0]
31
+ return f"The most common value in column '{col}' is '{value}'."
32
+ except Exception as e:
33
+ print("Direct analysis failed:", e)
34
+
35
+ # Otherwise summarize dataset for LLM
36
+ data_text = summarize_dataframe(df)
37
+
38
+ prompt = f"""
39
+ You are a data analysis assistant with expertise in statistics and data interpretation.
40
+
41
+ Analyze the dataset sample below and answer the user's question in a **clear, detailed, and well-explained way**.
42
+ Include both the **direct answer** and a short **explanation or reasoning** behind it.
43
+
44
+ Dataset Summary:
45
+ {data_text}
46
+
47
+ Question:
48
+ {query}
49
+
50
+ Answer (with explanation):
51
+ """
52
+
53
+ llm = LlamaCpp(
54
+ model_path=MODEL_PATH,
55
+ temperature=0.7,
56
+ max_new_tokens=1024,
57
+ n_ctx=16384,
58
+ verbose=True,
59
+ )
60
+
61
+ answer = llm(prompt)
62
+ return answer