Spaces:

SajilAwale
/

financial-advisor

Sleeping

App Files Files Community

Sajil Awale commited on Feb 21

Commit

7381684

1 Parent(s): dcb0e39

added multi user auth feture in fin adv

Browse files

Files changed (4) hide show

app.py +550 -114
mcp_server.py +104 -96
money_rag.py +159 -83
requirements.txt +6 -0

app.py CHANGED Viewed

@@ -3,129 +3,565 @@ import asyncio
 import os
 import json
 import plotly.io as pio
 from money_rag import MoneyRAG
-st.set_page_config(page_title="MoneyRAG", layout="wide")
-# Sidebar for Authentication
-with st.sidebar:
-    st.header("Authentication")
-    provider = st.selectbox("LLM Provider", ["Google", "OpenAI"])
-    if provider == "Google":
-        models = ["gemini-3-flash-preview", "gemini-3-pro-image-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite"]
-        embeddings = ["gemini-embedding-001"]
-    else:
-        models = ["gpt-5-mini", "gpt-5-nano", "gpt-4o-mini", "gpt-4o"]
-        embeddings = ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]
-    model_name = st.selectbox("Choose Decoder Model", models)
-    embed_name = st.selectbox("Choose Embedding Model", embeddings)
-    api_key = st.text_input("API Key", type="password")
-    auth_button = st.button("Authenticate")
-    if auth_button and api_key:
-        st.session_state.rag = MoneyRAG(provider, model_name, embed_name, api_key)
-        st.success("Authenticated!")
     st.divider()
-    st.caption("**Contributors:**")
-    st.caption("👤 [Sajil Awale](https://github.com/AwaleSajil)")
-    st.caption("👤 [Simran KC](https://github.com/iamsims)")
-# Main Window
-st.title("MoneyRAG 💰")
-st.subheader("Where is my money?")
-st.markdown("""
-This app helps you analyze your personal finances using AI.
-Upload your bank/credit card CSV statements to chat with your data semantically.
-""")
-# Guides Section
-col1, col2 = st.columns(2)
-with col1:
-    with st.expander("📚 How to get API keys"):
-        st.markdown("**Google Gemini API:**")
-        st.markdown("🔗 [Get API key from Google AI Studio](https://aistudio.google.com/app/apikey)")
-        st.markdown("")
-        st.markdown("**OpenAI API:**")
-        st.markdown("🔗 [Get API key from OpenAI Platform](https://platform.openai.com/api-keys)")
-with col2:
-    with st.expander("📥 How to download transaction history"):
-        st.markdown("**Chase Credit Card:**")
-        st.video("https://www.youtube.com/watch?v=gtAFaP9Lts8")
-        st.markdown("")
-        st.markdown("**Discover Credit Card:**")
-        st.video("https://www.youtube.com/watch?v=cry6-H5b0PQ")
-# Architecture Diagram
-with st.expander("🏗️ How MoneyRAG Works"):
-    st.image("architecture.svg", use_container_width=True)
-st.divider()
-if "rag" in st.session_state:
-    uploaded_files = st.file_uploader("Upload CSV transactions", accept_multiple_files=True, type=['csv'])
-    if uploaded_files:
-        if st.button("Ingest Data"):
-            temp_paths = []
-            for uploaded_file in uploaded_files:
-                path = os.path.join(st.session_state.rag.temp_dir, uploaded_file.name)
-                with open(path, "wb") as f:
-                    f.write(uploaded_file.getbuffer())
-                temp_paths.append(path)
-            with st.spinner("Ingesting and vectorizing..."):
-                asyncio.run(st.session_state.rag.setup_session(temp_paths))
-            st.success("Data ready for chat!")
-    # Chat Interface
-    st.divider()
-    if "messages" not in st.session_state:
-        st.session_state.messages = []
-    # Helper function to cleverly render either text or a Plotly chart
-    def render_content(content):
-        # We might have mixed text and charts delimited by ===CHART=== ... ===ENDCHART===
-        if isinstance(content, str) and "===CHART===" in content:
-            parts = content.split("===CHART===")
-            # Render first text part
-            st.markdown(parts[0].strip())
-            for part in parts[1:]:
-                if "===ENDCHART===" in part:
-                    chart_json, remaining_text = part.split("===ENDCHART===")
                     try:
-                        fig = pio.from_json(chart_json.strip())
-                        st.plotly_chart(fig, use_container_width=True)
                     except Exception as e:
-                        st.error("Failed to render chart.")
-                    if remaining_text.strip():
-                        st.markdown(remaining_text.strip())
-        else:
-            st.markdown(content)
-    # Render previous messages
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            render_content(message["content"])
-    # Handle new user input
-    if prompt := st.chat_input("Ask about your spending..."):
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        with st.chat_message("user"):
-            st.markdown(prompt)
-        with st.chat_message("assistant"):
-            with st.spinner("Thinking..."):
-                response = asyncio.run(st.session_state.rag.chat(prompt))
-                render_content(response)
-        st.session_state.messages.append({"role": "assistant", "content": response})
-else:
-    st.info("Please authenticate in the sidebar to start.")

 import os
 import json
 import plotly.io as pio
+from supabase import create_client, Client, ClientOptions
+from dotenv import load_dotenv
 from money_rag import MoneyRAG
+load_dotenv()
+st.set_page_config(page_title="MoneyRAG", layout="wide", initial_sidebar_state="expanded")
+# Initialize Supabase Client per request (NO CACHE) to ensure thread-safe auth headers
+def get_supabase() -> Client:
+    url = os.environ.get("SUPABASE_URL")
+    key = os.environ.get("SUPABASE_KEY")
+    if "access_token" in st.session_state:
+        opts = ClientOptions(headers={"Authorization": f"Bearer {st.session_state.access_token}"})
+        return create_client(url, key, options=opts)
+    return create_client(url, key)
+supabase = get_supabase()
+def inject_css():
+    st.html("""
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap" rel="stylesheet">
+    <style>
+    /* ── Global Reset & Font ── */
+    html, body, [class*="css"] {
+        font-family: 'Inter', sans-serif !important;
+    }
+    #MainMenu, footer, header { visibility: hidden; }
+    .block-container { padding-top: 2rem !important; }
+    /* ── Background ── */
+    .stApp {
+        background: #0a0a0f;
+        color: #e2e8f0;
+    }
+    /* ── Sidebar ── */
+    [data-testid="stSidebar"] {
+        background: linear-gradient(180deg, #0f0f1a 0%, #0d0d16 100%) !important;
+        border-right: 1px solid rgba(99,102,241,0.15) !important;
+    }
+    [data-testid="stSidebar"] * { color: #cbd5e1 !important; }
+    /* ── Nav buttons ── */
+    div[data-testid="stSidebarContent"] .nav-btn > div > button {
+        width: 100% !important;
+        text-align: left !important;
+        border: none !important;
+        border-radius: 10px !important;
+        background: transparent !important;
+        color: #94a3b8 !important;
+        padding: 0.65rem 1rem !important;
+        font-size: 0.9rem !important;
+        font-weight: 500 !important;
+        transition: all 0.2s ease !important;
+        margin-bottom: 2px !important;
+    }
+    div[data-testid="stSidebarContent"] .nav-btn > div > button:hover {
+        background: rgba(99,102,241,0.1) !important;
+        color: #a5b4fc !important;
+    }
+    div[data-testid="stSidebarContent"] .nav-btn-active > div > button {
+        background: linear-gradient(135deg, rgba(99,102,241,0.25), rgba(139,92,246,0.2)) !important;
+        color: #a5b4fc !important;
+        border: 1px solid rgba(99,102,241,0.3) !important;
+        font-weight: 600 !important;
+    }
+    /* ── Primary Buttons ── */
+    .stButton > button[kind="primary"] {
+        background: linear-gradient(135deg, #6366f1, #8b5cf6) !important;
+        border: none !important;
+        border-radius: 10px !important;
+        color: white !important;
+        font-weight: 600 !important;
+        padding: 0.6rem 1.2rem !important;
+        transition: all 0.2s ease !important;
+        box-shadow: 0 4px 15px rgba(99,102,241,0.3) !important;
+    }
+    .stButton > button[kind="primary"]:hover {
+        transform: translateY(-1px) !important;
+        box-shadow: 0 6px 20px rgba(99,102,241,0.45) !important;
+    }
+    /* ── Secondary Buttons ── */
+    .stButton > button[kind="secondary"] {
+        background: rgba(255,255,255,0.05) !important;
+        border: 1px solid rgba(255,255,255,0.1) !important;
+        border-radius: 10px !important;
+        color: #cbd5e1 !important;
+        font-weight: 500 !important;
+        transition: all 0.2s ease !important;
+    }
+    .stButton > button[kind="secondary"]:hover {
+        background: rgba(255,255,255,0.08) !important;
+        border-color: rgba(99,102,241,0.35) !important;
+    }
+    /* ── Inputs ── */
+    .stTextInput input, .stSelectbox > div > div {
+        background: rgba(255,255,255,0.04) !important;
+        border: 1px solid rgba(255,255,255,0.1) !important;
+        border-radius: 10px !important;
+        color: #e2e8f0 !important;
+        transition: border 0.2s ease !important;
+    }
+    .stTextInput input:focus { border-color: #6366f1 !important; box-shadow: 0 0 0 2px rgba(99,102,241,0.2) !important; }
+    /* ── Glass Cards ── */
+    .glass-card {
+        background: rgba(255,255,255,0.04);
+        border: 1px solid rgba(255,255,255,0.08);
+        border-radius: 16px;
+        padding: 1.75rem;
+        backdrop-filter: blur(12px);
+        transition: border 0.2s ease;
+    }
+    .glass-card:hover { border-color: rgba(99,102,241,0.25); }
+    /* ── Hero ── */
+    .hero { text-align: center; padding: 4rem 1rem 2rem; }
+    .hero .badge {
+        display: inline-block;
+        background: linear-gradient(135deg, rgba(99,102,241,0.2), rgba(139,92,246,0.2));
+        border: 1px solid rgba(99,102,241,0.35);
+        color: #a5b4fc;
+        font-size: 0.78rem;
+        font-weight: 600;
+        letter-spacing: 0.1em;
+        text-transform: uppercase;
+        padding: 0.3rem 0.9rem;
+        border-radius: 99px;
+        margin-bottom: 1.25rem;
+    }
+    .hero h1 {
+        font-size: clamp(2.5rem, 6vw, 4rem);
+        font-weight: 800;
+        letter-spacing: -2px;
+        line-height: 1.1;
+        background: linear-gradient(135deg, #e2e8f0 30%, #a5b4fc);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        margin-bottom: 1rem;
+    }
+    .hero p { font-size: 1.1rem; color: #64748b; max-width: 440px; margin: 0 auto; line-height: 1.7; }
+    /* ── Divider ── */
+    hr { border-color: rgba(255,255,255,0.07) !important; }
+    /* ── Expanders ── */
+    [data-testid="stExpander"] {
+        background: rgba(255,255,255,0.03) !important;
+        border: 1px solid rgba(255,255,255,0.07) !important;
+        border-radius: 12px !important;
+    }
+    /* ── Alerts ── */
+    [data-testid="stAlert"] { border-radius: 10px !important; }
+    /* ── Chat bubbles ── */
+    [data-testid="stChatMessage"] { border-radius: 12px !important; }
+    </style>
+    """)
+def login_register_page():
+    inject_css()
+    st.html("""
+    <div class="hero">
+        <div class="badge">✦ AI-Powered Finance</div>
+        <h1>MoneyRAG</h1>
+        <p>Your personal finance analyst. Upload bank statements, ask questions, get insights — powered by AI.</p>
+    </div>
+    """)
+    col_l, col1, col2, col_r = st.columns([1, 2, 2, 1])
+    with col1:
+        st.markdown('<div class="glass-card">', unsafe_allow_html=True)
+        st.markdown("### Sign In")
+        email = st.text_input("Email", key="login_email", placeholder="you@example.com", label_visibility="collapsed")
+        password = st.text_input("Password", type="password", key="login_pass", placeholder="Password", label_visibility="collapsed")
+        if st.button("Sign In →", use_container_width=True, type="primary"):
+            if email and password:
+                with st.spinner(""):
+                    try:
+                        res = supabase.auth.sign_in_with_password({"email": email, "password": password})
+                        st.session_state.user = res.user
+                        st.session_state.access_token = res.session.access_token
+                        st.query_params["t"] = res.session.access_token
+                        try:
+                            supabase.table("User").upsert({
+                                "id": res.user.id,
+                                "email": email,
+                                "hashed_password": "managed_by_supabase_auth"
+                            }).execute()
+                        except Exception as sync_e:
+                            print(f"Warning: Could not sync user: {sync_e}")
+                        st.rerun()
+                    except Exception as e:
+                        st.error(f"Login failed: {e}")
+        st.markdown('</div>', unsafe_allow_html=True)
+    with col2:
+        st.markdown('<div class="glass-card">', unsafe_allow_html=True)
+        st.markdown("### Create Account")
+        reg_email = st.text_input("Email", key="reg_email", placeholder="you@example.com", label_visibility="collapsed")
+        reg_password = st.text_input("Password", type="password", key="reg_pass", placeholder="Password", label_visibility="collapsed")
+        if st.button("Create Account →", use_container_width=True):
+            if reg_email and reg_password:
+                with st.spinner(""):
+                    try:
+                        res = supabase.auth.sign_up({"email": reg_email, "password": reg_password})
+                        if res.user:
+                            try:
+                                supabase.table("User").upsert({
+                                    "id": res.user.id, "email": reg_email,
+                                    "hashed_password": "managed_by_supabase_auth"
+                                }).execute()
+                            except Exception:
+                                pass
+                        st.success("Account created! Sign in on the left.")
+                    except Exception as e:
+                        st.error(f"Signup failed: {str(e)}")
+        st.markdown('</div>', unsafe_allow_html=True)
     st.divider()
+    col3, col4, col5 = st.columns(3)
+    with col3:
+        with st.expander("📚 API Keys"):
+            st.markdown("**Google:** [AI Studio](https://aistudio.google.com/app/apikey)")
+            st.markdown("**OpenAI:** [Platform](https://platform.openai.com/api-keys)")
+    with col4:
+        with st.expander("📥 Export Transactions"):
+            st.markdown("**Chase:** [Video guide](https://www.youtube.com/watch?v=gtAFaP9Lts8)")
+            st.markdown("**Discover:** [Video guide](https://www.youtube.com/watch?v=cry6-H5b0PQ)")
+    with col5:
+        with st.expander("🏗️ Architecture"):
+            st.image("architecture.svg", use_container_width=True)
+def load_user_config():
+    try:
+        # Always get a fresh client with the current auth token
+        client = get_supabase()
+        res = client.table("AccountConfig").select("*").eq("user_id", st.session_state.user.id).execute()
+        if res.data:
+            return res.data[0]
+    except Exception as e:
+        print(f"Failed to load config: {e}")
+    return None
+def main_app_view():
+    inject_css()
+    # Use session state for active nav tab
+    if "nav" not in st.session_state:
+        st.session_state.nav = "Chat"
+    with st.sidebar:
+        st.markdown(f"**MoneyRAG** 💰")
+        st.caption(st.session_state.user.email)
+        st.divider()
+        # Modern nav buttons using st.button styled via CSS
+        for label, icon in [("Chat", "💬"), ("Ingest Data", "📥"), ("Account Config", "⚙️")]:
+            is_active = st.session_state.nav == label
+            css_class = "nav-btn-active" if is_active else "nav-btn"
+            st.markdown(f'<div class="{css_class}">', unsafe_allow_html=True)
+            if st.button(f"{icon}  {label}", key=f"nav_{label}", use_container_width=True):
+                st.session_state.nav = label
+                st.rerun()
+            st.markdown('</div>', unsafe_allow_html=True)
+        st.divider()
+        if st.button("Log Out", use_container_width=True):
+            supabase.auth.sign_out()
+            if "t" in st.query_params:
+                del st.query_params["t"]
+            for key in list(st.session_state.keys()):
+                del st.session_state[key]
+            st.rerun()
+        st.divider()
+        st.caption("[Sajil Awale](https://github.com/AwaleSajil) · [Simran KC](https://github.com/iamsims)")
+    nav = st.session_state.nav
+    # Always reload config fresh (cached None from unauthenticated loads will persist otherwise)
+    config = load_user_config()
+    if nav == "Account Config":
+        st.header("⚙️ Account Configuration")
+        st.write("Configure your AI providers and models here.")
+        current_provider = config['llm_provider'] if config else "Google"
+        current_key = config['api_key'] if config else ""
+        current_decode = config.get('decode_model', "gemini-3-flash-preview") if config else "gemini-3-flash-preview"
+        current_embed = config.get('embedding_model', "gemini-embedding-001") if config else "gemini-embedding-001"
+        # Provider Selection - Default to Google
+        provider = st.selectbox("LLM Provider", ["Google", "OpenAI"], index=0 if (not config or config['llm_provider'] == "Google") else 1)
+        if provider == "Google":
+            models = ["gemini-3-flash-preview", "gemini-3-pro-image-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite"]
+            embeddings = ["gemini-embedding-001"]
+        else:
+            models = ["gpt-5-mini", "gpt-5-nano", "gpt-4o-mini", "gpt-4o"]
+            embeddings = ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]
+        with st.form("config_form"):
+            api_key = st.text_input("API Key", type="password", value=current_key)
+            col1, col2 = st.columns(2)
+            with col1:
+                # Default to gemini-3 if no config exists
+                m_default_val = current_decode if config else "gemini-3-flash-preview"
+                m_idx = models.index(m_default_val) if m_default_val in models else 0
+                final_decode = st.selectbox("Select Model", models, index=m_idx)
+            with col2:
+                e_idx = embeddings.index(current_embed) if (config and current_embed in embeddings) else 0
+                final_embed = st.selectbox("Select Embedding Model", embeddings, index=e_idx)
+            submitted = st.form_submit_button("Save Configuration", type="primary", use_container_width=True)
+            if submitted:
+                if not api_key:
+                    st.error("API Key is required.")
+                else:
                     try:
+                        record = {
+                            "user_id": st.session_state.user.id,
+                            "llm_provider": provider,
+                            "api_key": api_key,
+                            "decode_model": final_decode,
+                            "embedding_model": final_embed
+                        }
+                        if config:
+                            supabase.table("AccountConfig").update(record).eq("id", config['id']).execute()
+                        else:
+                            supabase.table("AccountConfig").insert(record).execute()
+                        st.session_state.user_config = load_user_config()
+                        # Reinitialize RAG with new config
+                        if "rag" in st.session_state:
+                            del st.session_state.rag
+                        st.success("Configuration saved successfully!")
                     except Exception as e:
+                        st.error(f"Failed to save configuration: {e}")
+    elif nav == "Ingest Data":
+        st.header("📥 Ingest Data")
+        uploaded_files = st.file_uploader("Upload CSV transactions", accept_multiple_files=True, type=['csv'])
+        if uploaded_files:
+            if st.button("Ingest Selected Files", type="primary"):
+                if not config:
+                    st.error("Please set up your Account Config first!")
+                    return
+                # Initialize RAG if needed
+                if "rag" not in st.session_state:
+                    st.session_state.rag = MoneyRAG(
+                        llm_provider=config["llm_provider"],
+                        model_name=config.get("decode_model", "gemini-2.5-pro"),
+                        embedding_model_name=config.get("embedding_model", "gemini-embedding-001"),
+                        api_key=config["api_key"],
+                        user_id=st.session_state.user.id,
+                        access_token=st.session_state.access_token
+                    )
+                csv_files_info = []
+                user_id = st.session_state.user.id
+                with st.spinner("Uploading to Supabase Storage & Processing..."):
+                    for uploaded_file in uploaded_files:
+                        # 1. Save temp locally for pandas parsing
+                        local_path = os.path.join(st.session_state.rag.temp_dir, uploaded_file.name)
+                        with open(local_path, "wb") as f:
+                            f.write(uploaded_file.getbuffer())
+                        # 2. Upload raw file to Supabase Object Storage
+                        s3_key = f"{user_id}/csvs/{uploaded_file.name}"
+                        try:
+                            supabase.storage.from_("money-rag-files").upload(
+                                file=local_path,
+                                path=s3_key,
+                                file_options={"content-type": "text/csv", "upsert": "true"}
+                            )
+                            # 3. Log the upload in the CSVFile table
+                            csv_record = supabase.table("CSVFile").insert({
+                                "user_id": user_id,
+                                "filename": uploaded_file.name,
+                                "s3_key": s3_key
+                            }).execute()
+                            csv_id = csv_record.data[0]['id']
+                            csv_files_info.append({"path": local_path, "csv_id": csv_id})
+                        except Exception as e:
+                            st.error(f"Error uploading {uploaded_file.name}: {e}")
+                            continue
+                    # 4. Trigger the LLM parsing, routing CSV data to Supabase Postgres
+                    if csv_files_info:
+                        asyncio.run(st.session_state.rag.setup_session(csv_files_info))
+                        st.success("Data uploaded, parsed, and vectorized securely!")
+                        st.rerun()
+        st.divider()
+        st.subheader("Your Uploaded Files")
+        try:
+            res = supabase.table("CSVFile").select("*").eq("user_id", st.session_state.user.id).execute()
+            files = res.data
+            if not files:
+                st.info("No files uploaded yet.")
+            else:
+                for f in files:
+                    col_file, col_del = st.columns([4, 1])
+                    with col_file:
+                        st.write(f"📄 **{f['filename']}** (Uploaded: {f['upload_date'][:10]})")
+                    with col_del:
+                        if st.button("Delete", key=f"del_{f['id']}"):
+                            st.session_state[f"confirm_del_{f['id']}"] = True
+                        if st.session_state.get(f"confirm_del_{f['id']}", False):
+                            st.warning("Are you sure? This permanently deletes the file from Cloud Storage, the SQL Database, and the Vector Index.")
+                            col_y, col_n = st.columns(2)
+                            with col_y:
+                                if st.button("Yes, Delete", key=f"yes_{f['id']}", type="primary"):
+                                    with st.spinner("Purging file data..."):
+                                        try:
+                                            # Delete from storage
+                                            supabase.storage.from_("money-rag-files").remove([f['s3_key']])
+                                        except Exception as e:
+                                            print(f"Warning storage delete failed: {e}")
+                                        # Use initialized RAG to delete from Vectors and Postgres
+                                        if "rag" not in st.session_state and config:
+                                            st.session_state.rag = MoneyRAG(
+                                                llm_provider=config["llm_provider"],
+                                                model_name=config.get("decode_model", "gemini-2.5-pro"),
+                                                embedding_model_name=config.get("embedding_model", "gemini-embedding-001"),
+                                                api_key=config["api_key"],
+                                                user_id=st.session_state.user.id,
+                                                access_token=st.session_state.access_token
+                                            )
+                                        if "rag" in st.session_state:
+                                            asyncio.run(st.session_state.rag.delete_file(f['id']))
+                                        else:
+                                            # Fallback if no RAG config to just delete from Postgres at least
+                                            supabase.table("Transaction").delete().eq("source_csv_id", f['id']).execute()
+                                            supabase.table("CSVFile").delete().eq("id", f['id']).execute()
+                                    del st.session_state[f"confirm_del_{f['id']}"]
+                                    st.success(f"Deleted {f['filename']}!")
+                                    st.rerun()
+                            with col_n:
+                                if st.button("Cancel", key=f"cancel_{f['id']}"):
+                                    del st.session_state[f"confirm_del_{f['id']}"]
+                                    st.rerun()
+        except Exception as e:
+            st.error(f"Failed to load files: {e}")
+    elif nav == "Chat":
+        st.header("💬 Financial Assistant")
+        if not config:
+            st.warning("Please configure your Account Config (API Key) first!")
+            return
+        if "rag" not in st.session_state:
+            st.session_state.rag = MoneyRAG(
+                llm_provider=config["llm_provider"],
+                model_name=config.get("decode_model", "gemini-2.5-pro"),
+                embedding_model_name=config.get("embedding_model", "gemini-embedding-001"),
+                api_key=config["api_key"],
+                user_id=st.session_state.user.id,
+                access_token=st.session_state.access_token
+            )
+        if "messages" not in st.session_state:
+            st.session_state.messages = []
+        # Show file ingestion status
+        try:
+            client = get_supabase()
+            files_res = client.table("CSVFile").select("id, filename").eq("user_id", st.session_state.user.id).execute()
+            file_count = len(files_res.data) if files_res.data else 0
+            if file_count == 0:
+                st.warning("⚠️ No data loaded yet. Go to **Ingest Data** to upload a CSV file before chatting.")
+            else:
+                names = ", ".join(f['filename'] for f in files_res.data[:3])
+                suffix = f" + {file_count - 3} more" if file_count > 3 else ""
+                st.info(f"📊 **{file_count} file{'s' if file_count > 1 else ''} loaded:** {names}{suffix}")
+        except Exception:
+            pass  # Don't break chat if the status check fails
+        # Helper function to cleverly render either text or a Plotly chart
+        def render_content(content):
+            if isinstance(content, str) and "===CHART===" in content:
+                parts = content.split("===CHART===")
+                st.markdown(parts[0].strip())
+                for part in parts[1:]:
+                    if "===ENDCHART===" in part:
+                        chart_json, remaining_text = part.split("===ENDCHART===")
+                        try:
+                            fig = pio.from_json(chart_json.strip())
+                            st.plotly_chart(fig, use_container_width=True)
+                        except Exception as e:
+                            st.error("Failed to render chart.")
+                        if remaining_text.strip():
+                            st.markdown(remaining_text.strip())
+            else:
+                st.markdown(content)
+        # Render previous messages
+        for message in st.session_state.messages:
+            with st.chat_message(message["role"]):
+                render_content(message["content"])
+        # Handle new user input
+        if prompt := st.chat_input("Ask about your spending..."):
+            st.session_state.messages.append({"role": "user", "content": prompt})
+            with st.chat_message("user"):
+                st.markdown(prompt)
+            with st.chat_message("assistant"):
+                with st.spinner("Thinking..."):
+                    try:
+                        response = asyncio.run(st.session_state.rag.chat(prompt))
+                        render_content(response)
+                        st.session_state.messages.append({"role": "assistant", "content": response})
+                    except Exception as e:
+                        st.error(f"Error during chat: {e}")
+if __name__ == "__main__":
+    # Attempt to restore session from query params if page was refreshed
+    if "user" not in st.session_state:
+        token_from_url = st.query_params.get("t")
+        if token_from_url:
+            try:
+                res = supabase.auth.get_user(token_from_url)
+                if res and res.user:
+                    st.session_state.user = res.user
+                    st.session_state.access_token = token_from_url
+            except Exception:
+                # Token is invalid/expired - clear it from the URL too
+                if "t" in st.query_params:
+                    del st.query_params["t"]
+    if "user" not in st.session_state:
+        login_register_page()
+    else:
+        main_app_view()

mcp_server.py CHANGED Viewed

@@ -6,55 +6,66 @@ from qdrant_client import QdrantClient
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
 from dotenv import load_dotenv
 import os
 import shutil
 # Load environment variables (API keys, etc.)
 load_dotenv()
 # Define paths to your data
-# For Hugging Face Spaces (Ephemeral):
-# We use a temporary directory that gets wiped on restart.
-# If DATA_DIR is set (e.g., by your deployment config), use it.
 DATA_DIR = os.getenv("DATA_DIR", os.path.join(os.path.dirname(os.path.abspath(__file__)), "temp_data"))
-QDRANT_PATH = os.path.join(DATA_DIR, "qdrant_db")
-DB_PATH = os.path.join(DATA_DIR, "money_rag.db")
 # Initialize the MCP Server
 mcp = FastMCP("Money RAG Financial Analyst")
-import sqlite3
 def get_schema_info() -> str:
-    """Get database schema information."""
-    if not os.path.exists(DB_PATH):
-        return "Database file does not exist yet. Please upload data."
-    try:
-        conn = sqlite3.connect(DB_PATH)
-        cursor = conn.cursor()
-        # Get all tables
-        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
-        tables = cursor.fetchall()
-        schema_info = []
-        for (table_name,) in tables:
-            schema_info.append(f"\nTable: {table_name}")
-            # Get column info for each table
-            cursor.execute(f"PRAGMA table_info({table_name});")
-            columns = cursor.fetchall()
-            schema_info.append("Columns:")
-            for col in columns:
-                col_id, col_name, col_type, not_null, default_val, pk = col
-                schema_info.append(f"  - {col_name} ({col_type})")
-        conn.close()
-        return "\n".join(schema_info)
-    except Exception as e:
-        return f"Error reading schema: {e}"
 @mcp.resource("schema://database/tables")
@@ -64,7 +75,17 @@ def get_database_schema() -> str:
 @mcp.tool()
 def query_database(query: str) -> str:
-    """Execute a SELECT query on the money_rag SQLite database.
     Args:
         query: The SQL SELECT query to execute
@@ -78,33 +99,32 @@ def query_database(query: str) -> str:
     - 'amount' column: positive values = spending, negative values = payments/refunds
     Example queries:
-    - Find Walmart spending: SELECT SUM(amount) FROM transactions WHERE description LIKE '%Walmart%' AND amount > 0;
-    - List recent transactions: SELECT transaction_date, description, amount, category FROM transactions ORDER BY transaction_date DESC LIMIT 5;
-    - Spending by category: SELECT category, SUM(amount) FROM transactions WHERE amount > 0 GROUP BY category;
     """
-    if not os.path.exists(DB_PATH):
-        return "Database file does not exist yet. Please upload data."
     # Security: Only allow SELECT queries
     query_upper = query.strip().upper()
-    if not query_upper.startswith("SELECT") and not query_upper.startswith("PRAGMA"):
-        return "Error: Only SELECT and PRAGMA queries are allowed"
     # Forbidden operations
-    forbidden = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "REPLACE", "TRUNCATE", "ATTACH", "DETACH"]
-    # Check for forbidden words as standalone words to avoid false positives (e.g. "update_date" column)
-    # Simple check: space-surrounded or end-of-string
     if any(f" {word} " in f" {query_upper} " for word in forbidden):
         return f"Error: Query contains forbidden operation. Only SELECT queries allowed."
     try:
-        conn = sqlite3.connect(DB_PATH)
         cursor = conn.cursor()
         cursor.execute(query)
         results = cursor.fetchall()
         # Get column names to make result more readable
-        column_names = [description[0] for description in cursor.description] if cursor.description else []
         conn.close()
@@ -118,28 +138,20 @@ def query_database(query: str) -> str:
             formatted_results.append(str(row))
         return "\n".join(formatted_results)
-    except sqlite3.Error as e:
-        return f"Error: {str(e)}"
 def get_vector_store():
     """Initialize connection to the Qdrant vector store"""
     # Initialize Embedding Model using Google AI Studio
-    embeddings = GoogleGenerativeAIEmbeddings(model="text-embedding-004")
-    # Connect to Qdrant (Persistent Disk Mode at specific path)
-    # We ensure the directory exists so Qdrant can write to it.
-    os.makedirs(QDRANT_PATH, exist_ok=True)
-    client = QdrantClient(path=QDRANT_PATH)
-    # Check if collection exists (it might be empty in a new ephemeral session)
-    collections = client.get_collections().collections
-    collection_names = [c.name for c in collections]
-    if "transactions" not in collection_names:
-        # In a real app, you would probably trigger ingestion here or handle the empty state
-        pass
     return QdrantVectorStore(
         client=client,
         collection_name="transactions",
@@ -159,20 +171,22 @@ def semantic_search(query: str, top_k: int = 5) -> str:
         top_k: Number of results to return (default 5).
     """
     try:
         vector_store = get_vector_store()
-        # Safety check: if no data has been ingested yet
-        if not os.path.exists(QDRANT_PATH) or not os.listdir(QDRANT_PATH):
-             return "No matching transactions found (Database is empty. Please upload data first)."
-        results = vector_store.similarity_search(query, k=top_k)
         if not results:
             return "No matching transactions found."
         output = []
         for doc in results:
-            # Format the output clearly for the LLM/User
             amount = doc.metadata.get('amount', 'N/A')
             date = doc.metadata.get('transaction_date', 'N/A')
             output.append(f"Date: {date} | Match: {doc.page_content} | Amount: {amount}")
@@ -184,25 +198,29 @@ def semantic_search(query: str, top_k: int = 5) -> str:
 @mcp.tool()
-def generate_interactive_chart(sql_query: str, chart_type: str, x_col: str, y_col: str, title: str) -> str:
     """
-    Generate an interactive Plotly chart from the money_rag SQLite database.
-    Use this proactively whenever a visual representation of data would be helpful.
-    CRITICAL INSTRUCTIONS:
-    1. Write a valid SQLite SELECT query.
-    2. Aggregate data appropriately (e.g., use GROUP BY for pie/bar charts).
-    3. Pass the exact column names from your query to x_col and y_col.
     Args:
-        sql_query: The SQL SELECT query (e.g. "SELECT category, SUM(amount) as total FROM transactions GROUP BY category")
-        chart_type: Must be exactly "bar", "pie", or "line"
-        x_col: Column name from query for X-axis (or labels for pie)
-        y_col: Column name from query for Y-axis (or values for pie)
-        title: Title of the chart
     """
     try:
-        conn = sqlite3.connect(DB_PATH)
         df = pd.read_sql_query(sql_query, conn)
         conn.close()
         if df.empty:
@@ -226,17 +244,7 @@ def generate_interactive_chart(sql_query: str, chart_type: str, x_col: str, y_co
         return f'{{"error": "Failed to generate chart: {str(e)}"}}'
-# A helper to clear data (useful for session reset)
-@mcp.tool()
-def clear_database() -> str:
-    """Clear all stored transaction data to reset the session."""
-    try:
-        if os.path.exists(DATA_DIR):
-            shutil.rmtree(DATA_DIR)
-            os.makedirs(DATA_DIR)
-        return "Database cleared successfully."
-    except Exception as e:
-        return f"Error clearing database: {e}"
 if __name__ == "__main__":
     # Runs the server over stdio

 from langchain_google_genai import GoogleGenerativeAIEmbeddings
 from dotenv import load_dotenv
 import os
+from typing import Optional
 import shutil
+from textwrap import dedent
 # Load environment variables (API keys, etc.)
 load_dotenv()
 # Define paths to your data
 DATA_DIR = os.getenv("DATA_DIR", os.path.join(os.path.dirname(os.path.abspath(__file__)), "temp_data"))
 # Initialize the MCP Server
 mcp = FastMCP("Money RAG Financial Analyst")
+import psycopg2
+from supabase import create_client, Client
+def get_db_connection():
+    """Returns a psycopg2 connection to Supabase Postgres."""
+    # Supabase provides postgres connection strings, but typically doesn't default in plain OS vars unless you build it
+    # Supabase gives a postgres:// connection string in the dashboard under Database Settings.
+    # Alternatively we can build it manually or just use the Supabase python client.
+    # To support raw LLM SQL, we use psycopg2 instead of Supabase client.
+    db_url = os.environ.get("DATABASE_URL")
+    if not db_url:
+        raise ValueError("DATABASE_URL must be defined to construct raw SQL connections.")
+    return psycopg2.connect(db_url)
+def get_current_user_id() -> str:
+    user_id = os.environ.get("CURRENT_USER_ID")
+    if not user_id:
+        raise ValueError("CURRENT_USER_ID not injected into MCP environment!")
+    return user_id
 def get_schema_info() -> str:
+    """Get database schema information for Postgres tables."""
+    return dedent("""
+    Here is the PostgreSQL database schema for the authenticated user's data.
+    CRITICAL RULE:
+    You MUST add `WHERE user_id = '{current_user_id}'` to EVERY SINGLE query you write.
+    Never query data without filtering by user_id!
+    TABLE: "Transaction"
+    Columns:
+      - id (UUID)
+      - user_id (UUID)
+      - trans_date (DATE)
+      - description (TEXT)
+      - amount (DECIMAL)
+      - category (VARCHAR)
+    TABLE: "TransactionDetail"
+    Columns:
+      - id (UUID)
+      - transaction_id (UUID)
+      - item_description (TEXT)
+      - item_total_price (DECIMAL)
+    """)
 @mcp.resource("schema://database/tables")
 @mcp.tool()
 def query_database(query: str) -> str:
+    """
+    Execute a raw SQL query against the Postgres database.
+    The main table is named "Transaction" (you MUST INCLUDE QUOTES in your SQL!).
+    IMPORTANT STRICT SCHEMA:
+    - id (UUID)
+    - user_id (UUID text)
+    - trans_date (DATE)
+    - description (TEXT)
+    - amount (NUMERIC)
+    - category (TEXT)
+    - enriched_info (TEXT)
     Args:
         query: The SQL SELECT query to execute
     - 'amount' column: positive values = spending, negative values = payments/refunds
     Example queries:
+    - Find Walmart spending: SELECT SUM(amount) FROM "Transaction" WHERE description LIKE '%Walmart%' AND amount > 0;
+    - List recent transactions: SELECT trans_date, description, amount, category FROM "Transaction" ORDER BY trans_date DESC LIMIT 5;
+    - Spending by category: SELECT category, SUM(amount) FROM "Transaction" WHERE amount > 0 GROUP BY category;
     """
     # Security: Only allow SELECT queries
     query_upper = query.strip().upper()
+    if not query_upper.startswith("SELECT") and not query_upper.startswith("WITH"):
+        return "Error: Only SELECT queries are allowed"
     # Forbidden operations
+    forbidden = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "REPLACE", "TRUNCATE"]
     if any(f" {word} " in f" {query_upper} " for word in forbidden):
         return f"Error: Query contains forbidden operation. Only SELECT queries allowed."
+    user_id = get_current_user_id()
+    if user_id not in query:
+        return f"Error: You forgot to include the security filter (WHERE user_id = '{user_id}') in your query! Try again."
     try:
+        conn = get_db_connection()
         cursor = conn.cursor()
         cursor.execute(query)
         results = cursor.fetchall()
         # Get column names to make result more readable
+        column_names = [desc[0] for desc in cursor.description] if cursor.description else []
         conn.close()
             formatted_results.append(str(row))
         return "\n".join(formatted_results)
+    except psycopg2.Error as e:
+        return f"Database Error: {str(e)}"
 def get_vector_store():
     """Initialize connection to the Qdrant vector store"""
     # Initialize Embedding Model using Google AI Studio
+    embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
+    # Connect to Qdrant Cloud
+    client = QdrantClient(
+        url=os.getenv("QDRANT_URL"),
+        api_key=os.getenv("QDRANT_API_KEY"),
+    )
     return QdrantVectorStore(
         client=client,
         collection_name="transactions",
         top_k: Number of results to return (default 5).
     """
     try:
+        user_id = get_current_user_id()
         vector_store = get_vector_store()
+        # Apply strict multi-tenant filtering based on the payload we injected in money_rag.py
+        from qdrant_client.http import models
+        filter = models.Filter(
+            must=[models.FieldCondition(key="metadata.user_id", match=models.MatchValue(value=user_id))]
+        )
+        results = vector_store.similarity_search(query, k=top_k, filter=filter)
         if not results:
             return "No matching transactions found."
         output = []
         for doc in results:
             amount = doc.metadata.get('amount', 'N/A')
             date = doc.metadata.get('transaction_date', 'N/A')
             output.append(f"Date: {date} | Match: {doc.page_content} | Amount: {amount}")
 @mcp.tool()
+def generate_interactive_chart(sql_query: str, chart_type: str, x_col: str, y_col: str, title: str, color_col: Optional[str] = None) -> str:
     """
+    Generate an interactive Plotly chart using SQL data.
+    IMPORTANT: The table name MUST be "Transaction" exactly with quotes.
     Args:
+        sql_query: The SQL SELECT query to retrieve the data for the chart from the "Transaction" table.
+            - Must use 'user_id' filter.
+        chart_type: The type of chart: 'bar', 'line', 'pie', 'scatter'
+        x_col: The name of the column to use for the X axis (or labels for pie charts)
+        y_col: The name of the column to use for the Y axis (or values for pie charts)
+        title: The title of the chart
+        color_col: (Optional) Column to use for color grouping
+    Returns:
+        A natural language summary confirming chart generation.
     """
     try:
+        user_id = get_current_user_id()
+        if user_id not in sql_query:
+            return f'{{"error": "You forgot the WHERE user_id = \\"{user_id}\\" security clause!"}}'
+        conn = get_db_connection()
         df = pd.read_sql_query(sql_query, conn)
         conn.close()
         if df.empty:
         return f'{{"error": "Failed to generate chart: {str(e)}"}}'
 if __name__ == "__main__":
     # Runs the server over stdio

money_rag.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import uuid
 import asyncio
 import pandas as pd
@@ -21,16 +22,34 @@ from langgraph.checkpoint.memory import InMemorySaver
 from langchain.agents import create_agent
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_mcp_adapters.client import MultiServerMCPClient
 # Import specific embeddings
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
 from langchain_openai import OpenAIEmbeddings
 class MoneyRAG:
-    def __init__(self, llm_provider: str, model_name: str, embedding_model_name: str, api_key: str):
         self.llm_provider = llm_provider.lower()
         self.model_name = model_name
         self.embedding_model_name = embedding_model_name
         # Set API Keys
         if self.llm_provider == "google":
@@ -60,17 +79,18 @@ class MoneyRAG:
         self.mcp_client: Optional[MultiServerMCPClient] = None
         self.search_tool = DuckDuckGoSearchRun()
         self.merchant_cache = {}  # Session-based cache for merchant enrichment
-    async def setup_session(self, csv_paths: List[str]):
         """Ingests CSVs and sets up DBs."""
-        for path in csv_paths:
-            await self._ingest_csv(path)
         self.db = SQLDatabase.from_uri(f"sqlite:///{self.db_path}")
         self.vector_store = self._sync_to_qdrant()
-        await self._init_agent()
-    async def _ingest_csv(self, file_path):
         df = pd.read_csv(file_path)
         headers = df.columns.tolist()
         sample_data = df.head(10).to_json()
@@ -108,14 +128,16 @@ class MoneyRAG:
         mapping = await chain.ainvoke({"headers": headers, "sample": sample_data, "filename": os.path.basename(file_path)})
         standard_df = pd.DataFrame()
-        standard_df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]
-        standard_df['transaction_date'] = pd.to_datetime(df[mapping['date_col']])
         standard_df['description'] = df[mapping['desc_col']]
         raw_amounts = pd.to_numeric(df[mapping['amount_col']])
         standard_df['amount'] = raw_amounts * -1 if mapping['sign_convention'] == "spending_is_negative" else raw_amounts
         standard_df['category'] = df[mapping.get('category_col')] if mapping.get('category_col') else 'Uncategorized'
-        standard_df['source_file'] = os.path.basename(file_path)
         # --- Async Enrichment Step ---
         print(f"   ✨ Enriching descriptions for {os.path.basename(file_path)}...")
@@ -143,29 +165,49 @@ class MoneyRAG:
         desc_map = dict(zip(unique_descriptions, enrichment_results))
         standard_df['enriched_info'] = standard_df['description'].map(desc_map).fillna("")
-        conn = sqlite3.connect(self.db_path)
-        standard_df.to_sql("transactions", conn, if_exists="append", index=False)
-        conn.close()
     def _sync_to_qdrant(self):
-        client = QdrantClient(path=self.qdrant_path)
         collection = "transactions"
-        conn = sqlite3.connect(self.db_path)
-        df = pd.read_sql_query("SELECT * FROM transactions", conn)
-        conn.close()
         # Check for empty dataframe
         if df.empty:
-            raise ValueError("No transactions found in database. Please ingest CSV files first.")
         # Dynamically detect embedding dimension
         sample_embedding = self.embeddings.embed_query("test")
         embedding_dim = len(sample_embedding)
-        client.recreate_collection(
             collection_name=collection,
-            vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
         )
         vs = QdrantVectorStore(client=client, collection_name=collection, embedding=self.embeddings)
@@ -180,90 +222,124 @@ class MoneyRAG:
             else:
                 texts.append(base_text)
-        metadatas = df[['id', 'amount', 'category', 'transaction_date']].to_dict('records')
-        for m in metadatas: m['transaction_date'] = str(m['transaction_date'])
-        vs.add_texts(texts=texts, metadatas=metadatas)
         return vs
-    async def _init_agent(self):
-        # 1. Initialize MCP client with absolute path to server
         server_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mcp_server.py")
-        self.mcp_client = MultiServerMCPClient(
             {
                 "money_rag": {
                     "transport": "stdio",
-                    "command": "python",
                     "args": [server_path],
-                    "env": os.environ.copy(),
                 }
             }
         )
-        # 2. Get tools from MCP server
-        mcp_tools = await self.mcp_client.get_tools()
-        # 3. Define the Agent with MCP Tools
-        system_prompt = (
-            "You are a financial analyst. Use the provided tools to query the database "
-            "and perform semantic searches. Spending is POSITIVE (>0). "
-            "Always explain your findings clearly."
-            "IMPORTANT: Whenever possible and relevant (e.g. when discussing trends, comparing categories, or showing breakdowns), "
-            "you MUST proactively use the 'generate_interactive_chart' tool to generate visual plots (bar, pie, or line charts) to accompany your analysis. "
-            "WARNING: You MUST use the actual tool call to generate the chart. DO NOT simply output a json block with chart parameters as your final text answer."
-        )
-        self.agent = create_agent(
-            model=self.llm,
-            tools=mcp_tools,
-            system_prompt=system_prompt,
-            checkpointer=InMemorySaver(),
-        )
-    async def chat(self, query: str):
-        config = {"configurable": {"thread_id": "session_1"}}
-        # Clear out any previous chart so we don't carry over stale plots
-        chart_path = os.path.join(self.temp_dir, "latest_chart.json")
-        if os.path.exists(chart_path):
-            os.remove(chart_path)
-        result = await self.agent.ainvoke(
-            {"messages": [{"role": "user", "content": query}]},
-            config,
-        )
-        # Extract content - handle both string and list formats
-        content = result["messages"][-1].content
-        # If content is a list (Gemini format), extract text from blocks
-        if isinstance(content, list):
-            text_parts = []
-            for block in content:
-                if isinstance(block, dict) and block.get("type") == "text":
-                    text_parts.append(block.get("text", ""))
-            final_text = "\n".join(text_parts)
-        else:
-            final_text = content
-        # Check if the tool generated a chart file on disk during this turn
-        chart_path = os.path.join(self.temp_dir, "latest_chart.json")
-        if os.path.exists(chart_path):
-            with open(chart_path, "r") as f:
-                chart_json = f.read()
-            final_text += f"\n\n===CHART===\n{chart_json}\n===ENDCHART==="
-        return final_text
     async def cleanup(self):
         """Delete temporary session files and close MCP client."""
-        if self.mcp_client:
-            try:
-                await self.mcp_client.close()
-            except Exception as e:
-                print(f"Warning: Failed to close MCP client: {e}")
         if os.path.exists(self.temp_dir):
             try:
                 shutil.rmtree(self.temp_dir)

 import os
+import sys
 import uuid
 import asyncio
 import pandas as pd
 from langchain.agents import create_agent
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_mcp_adapters.client import MultiServerMCPClient
+from qdrant_client.http import models as qdrant_models
 # Import specific embeddings
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
 from langchain_openai import OpenAIEmbeddings
+from supabase import create_client, ClientOptions
+from dotenv import load_dotenv
+load_dotenv()
 class MoneyRAG:
+    def __init__(self, llm_provider: str, model_name: str, embedding_model_name: str, api_key: str, user_id: str, access_token: str = None):
         self.llm_provider = llm_provider.lower()
         self.model_name = model_name
         self.embedding_model_name = embedding_model_name
+        self.user_id = user_id
+        # Initialize Supabase Client
+        url = os.environ.get("SUPABASE_URL")
+        key = os.environ.get("SUPABASE_KEY")
+        # Security: Inject the logged-in user's JWT so RLS policies pass!
+        if access_token:
+            opts = ClientOptions(headers={"Authorization": f"Bearer {access_token}"})
+            self.supabase = create_client(url, key, options=opts)
+        else:
+            self.supabase = create_client(url, key)
         # Set API Keys
         if self.llm_provider == "google":
         self.mcp_client: Optional[MultiServerMCPClient] = None
         self.search_tool = DuckDuckGoSearchRun()
         self.merchant_cache = {}  # Session-based cache for merchant enrichment
+        self.memory = InMemorySaver()  # Session-based cache for chat memory
+    async def setup_session(self, csv_files: List[dict]):
         """Ingests CSVs and sets up DBs."""
+        # csv_files format: [{"path": "/temp/file.csv", "csv_id": "uuid"}, ...]
+        for file_info in csv_files:
+            await self._ingest_csv(file_info["path"], file_info.get("csv_id"))
         self.db = SQLDatabase.from_uri(f"sqlite:///{self.db_path}")
         self.vector_store = self._sync_to_qdrant()
+    async def _ingest_csv(self, file_path, csv_id=None):
         df = pd.read_csv(file_path)
         headers = df.columns.tolist()
         sample_data = df.head(10).to_json()
         mapping = await chain.ainvoke({"headers": headers, "sample": sample_data, "filename": os.path.basename(file_path)})
         standard_df = pd.DataFrame()
+        standard_df['trans_date'] = pd.to_datetime(df[mapping['date_col']]).dt.strftime('%Y-%m-%d')
+        # Assign user_id AFTER trans_date establishes the DataFrame length, or else it defaults to NaN!
+        standard_df['user_id'] = self.user_id
         standard_df['description'] = df[mapping['desc_col']]
+        if csv_id:
+            standard_df['source_csv_id'] = csv_id
         raw_amounts = pd.to_numeric(df[mapping['amount_col']])
         standard_df['amount'] = raw_amounts * -1 if mapping['sign_convention'] == "spending_is_negative" else raw_amounts
         standard_df['category'] = df[mapping.get('category_col')] if mapping.get('category_col') else 'Uncategorized'
         # --- Async Enrichment Step ---
         print(f"   ✨ Enriching descriptions for {os.path.basename(file_path)}...")
         desc_map = dict(zip(unique_descriptions, enrichment_results))
         standard_df['enriched_info'] = standard_df['description'].map(desc_map).fillna("")
+        # Save to Supabase transactions table instead of local SQLite
+        # Use simplejson roundtrip to guarantee all Pandas NaNs, NaTs, and weird floats become strict JSON nulls
+        import json
+        records = json.loads(standard_df.to_json(orient='records'))
+        batch_size = 100
+        for i in range(0, len(records), batch_size):
+            batch = records[i:i + batch_size]
+            # If insertion fails, it raises an exception so Streamlit surfaces the error
+            self.supabase.table("Transaction").insert(batch).execute()
     def _sync_to_qdrant(self):
+        # client = QdrantClient(path=self.qdrant_path)
+        client = QdrantClient(
+            url=os.getenv("QDRANT_URL"),
+            api_key=os.getenv("QDRANT_API_KEY"),
+        )
         collection = "transactions"
+        # Fetch only THIS USER'S transactions from Supabase to sync into VectorDB
+        res = self.supabase.table("Transaction").select("*").eq("user_id", self.user_id).execute()
+        df = pd.DataFrame(res.data)
         # Check for empty dataframe
         if df.empty:
+            raise ValueError("No transactions found in database for this user. Please upload files first.")
         # Dynamically detect embedding dimension
         sample_embedding = self.embeddings.embed_query("test")
         embedding_dim = len(sample_embedding)
+        # Safely create the collection only if it doesn't already exist to preserve multi-tenant pool
+        if not client.collection_exists(collection):
+            client.create_collection(
+                collection_name=collection,
+                vectors_config=qdrant_models.VectorParams(size=embedding_dim, distance=qdrant_models.Distance.COSINE),
+            )
+        # Security: Create a strict Payload Index on the user_id field so we can filter by it securely!
+        client.create_payload_index(
             collection_name=collection,
+            field_name="metadata.user_id",
+            field_schema=qdrant_models.PayloadSchemaType.KEYWORD,
         )
         vs = QdrantVectorStore(client=client, collection_name=collection, embedding=self.embeddings)
             else:
                 texts.append(base_text)
+        # Inject critical user_id payload to Qdrant so we can filter on it during retrieval
+        metadatas = df[['id', 'amount', 'category', 'trans_date']].copy()
+        if 'source_csv_id' in df.columns:
+            metadatas['source_csv_id'] = df['source_csv_id']
+        metadatas = metadatas.to_dict('records')
+        vector_ids = []
+        for m in metadatas:
+            vector_ids.append(str(m['id'])) # Keep original Postgres UUID as Vector ID to prevent duplication
+            m['user_id'] = self.user_id # Secure payload identifier
+            m['transaction_date'] = str(m['trans_date']) # Rename for agent consistency
+            del m['trans_date']
+        vs.add_texts(texts=texts, metadatas=metadatas, ids=vector_ids)
         return vs
+    async def delete_file(self, csv_id: str):
+        """Force delete a file and all its transactions from Postgres and Qdrant."""
+        try:
+            # 1. Delete from Postgres (Transactions cascade automatically if foreign keyed... but we'll manually ensure they wipe just in case)
+            self.supabase.table("Transaction").delete().eq("source_csv_id", csv_id).execute()
+            self.supabase.table("CSVFile").delete().eq("id", csv_id).execute()
+            # 2. Delete from Qdrant via payload filter
+            client = QdrantClient(url=os.getenv("QDRANT_URL"), api_key=os.getenv("QDRANT_API_KEY"))
+            client.delete(
+                collection_name="transactions",
+                points_selector=qdrant_models.Filter(
+                    must=[
+                        qdrant_models.FieldCondition(
+                            key="metadata.source_csv_id",
+                            match=qdrant_models.MatchValue(value=csv_id)
+                        )
+                    ]
+                )
+            )
+        except Exception as e:
+            print(f"Error purging file data: {e}")
+    async def chat(self, query: str):
+        # 1. Initialize MCP client dynamically to guarantee fresh bindings
         server_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mcp_server.py")
+        mcp_client = MultiServerMCPClient(
             {
                 "money_rag": {
                     "transport": "stdio",
+                    "command": sys.executable,
                     "args": [server_path],
+                    "env": {**os.environ.copy(), "CURRENT_USER_ID": self.user_id},
                 }
             }
         )
+        try:
+            # 2. Extract tools from the safely established subprocess
+            mcp_tools = await mcp_client.get_tools()
+            # 3. Create the LangGraph agent for this turn, preserving historical memory cache
+            system_prompt = (
+                "You are a financial analyst. Use the provided tools to query the database "
+                "and perform semantic searches. Spending is POSITIVE (>0). "
+                "Always explain your findings clearly."
+                "IMPORTANT: Whenever possible and relevant (e.g. when discussing trends, comparing categories, or showing breakdowns), "
+                "you MUST proactively use the 'generate_interactive_chart' tool to generate visual plots (bar, pie, or line charts) to accompany your analysis. "
+                "WARNING: You MUST use the actual tool call to generate the chart. DO NOT simply output a json block with chart parameters as your final text answer."
+            )
+            agent = create_agent(
+                model=self.llm,
+                tools=mcp_tools,
+                system_prompt=system_prompt,
+                checkpointer=self.memory,
+            )
+            config = {"configurable": {"thread_id": "session_1"}}
+            # Clear out any previous chart so we don't carry over stale plots
+            chart_path = os.path.join(self.temp_dir, "latest_chart.json")
+            if os.path.exists(chart_path):
+                os.remove(chart_path)
+            # 4. Invoke the agent against the LLM, triggering our nested Tools locally
+            result = await agent.ainvoke(
+                {"messages": [{"role": "user", "content": query}]},
+                config,
+            )
+            # Extract content - handle both string and list formats
+            content = result["messages"][-1].content
+            # If content is a list (Gemini format), extract text from blocks
+            if isinstance(content, list):
+                text_parts = []
+                for block in content:
+                    if isinstance(block, dict) and block.get("type") == "text":
+                        text_parts.append(block.get("text", ""))
+                final_text = "\n".join(text_parts)
+            else:
+                final_text = content
+            # Check for generated chart
+            if os.path.exists(chart_path):
+                with open(chart_path, "r") as f:
+                    chart_json = f.read()
+                return f"{final_text}\n\n===CHART===\n{chart_json}\n===ENDCHART==="
+            return final_text
+        finally:
+            # 5. Destroy the subprocess safely so we don't leak FastMCP zombies across Streamlit reruns
+            try:
+                await mcp_client.close()
+            except Exception as close_e:
+                print(f"Warning on closing MCP Client: {close_e}")
     async def cleanup(self):
         """Delete temporary session files and close MCP client."""
         if os.path.exists(self.temp_dir):
             try:
                 shutil.rmtree(self.temp_dir)

requirements.txt CHANGED Viewed

@@ -41,3 +41,9 @@ tenacity>=9.1.2
 streamlit>=1.53.0
 ddgs>=9.10.0

 streamlit>=1.53.0
 ddgs>=9.10.0
+supabase>=2.28.0
+plotly>=6.5.2
+psycopg2-binary>=2.9.11
+extra-streamlit-components