import streamlit as st import pandas as pd import numpy as np import os import requests import json import re # — Page config st.set_page_config(page_title="CSV-Backed AI Agent", layout="wide") # — Title & image st.title("CSV-Backed AI Agent") st.image("./nadi-lok-image.png") # — Load API key OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") if not OPENAI_API_KEY: st.error("❌ OPENAI_API_KEY not set in Settings → Secrets.") st.stop() HEADERS = { "Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json", } # — Sidebar: CSV upload & preview st.sidebar.header("Upload CSV File") uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv") if uploaded_file: try: df = pd.read_csv(uploaded_file) st.sidebar.success("File uploaded successfully!") st.sidebar.write("Preview of the uploaded file:") st.sidebar.dataframe(df.head()) except Exception as e: st.sidebar.error(f"Error reading file: {e}") df = None else: df = None if df is not None: st.markdown(f"**Loaded CSV:** {df.shape[0]} rows × {df.shape[1]} columns") @st.cache_data(show_spinner=False) def build_row_embeddings(df: pd.DataFrame): # 1) Convert each row to a compact JSON string texts = df.apply(lambda r: r.to_json(), axis=1).tolist() # 2) Batch‐call the embeddings endpoint embeddings = [] for i in range(0, len(texts), 100): batch = texts[i : i + 100] resp = requests.post( "https://api.openai.com/v1/embeddings", headers=HEADERS, json={"model": "text-embedding-ada-002", "input": batch}, timeout=60, ) resp.raise_for_status() data = resp.json()["data"] embeddings.extend(d["embedding"] for d in data) return np.array(embeddings), texts embeddings, row_texts = build_row_embeddings(df) # — Prompt input prompt = st.text_area( "Enter your prompt for the agent", placeholder="e.g. Which products have price > 100?", height=150, ) # — Run Agent if st.button("Run Agent"): if df is None: st.error("Please upload a CSV file first.") elif not prompt.strip(): st.error("Please enter a prompt.") else: # 1) Embed the prompt q_resp = requests.post( "https://api.openai.com/v1/embeddings", headers=HEADERS, json={"model": "text-embedding-ada-002", "input": [prompt]}, timeout=60, ) q_resp.raise_for_status() q_emb = np.array(q_resp.json()["data"][0]["embedding"]) # 2) Compute cosine similarities norms = np.linalg.norm(embeddings, axis=1) q_norm = np.linalg.norm(q_emb) sims = embeddings.dot(q_emb) / (norms * q_norm + 1e-8) # 3) Select top-K relevant rows K = min(5, len(sims)) top_idxs = sims.argsort()[-K:][::-1] relevant_rows = [row_texts[i] for i in top_idxs] # 4) Build the chat messages system_msg = { "role": "system", "content": ( "You are an AI agent that reads the provided CSV rows and answers the user's query. " "Return your answer strictly as JSON (no extra explanation)." ), } memory_msg = { "role": "system", "content": "Relevant CSV rows:\n" + "\n".join(relevant_rows) + "\n", } user_msg = {"role": "user", "content": prompt} # 5) Call the Chat API chat_resp = requests.post( "https://api.openai.com/v1/chat/completions", headers=HEADERS, json={ "model": "gpt-3.5-turbo", "messages": [system_msg, memory_msg, user_msg], "temperature": 0, "max_tokens": 1500, }, timeout=60, ) chat_resp.raise_for_status() answer = chat_resp.json()["choices"][0]["message"]["content"] # 6) Extract the JSON payload txt = re.sub(r"```(?:json)?", "", answer).strip() start = txt.find("{") end = txt.rfind("}") + 1 parsed = None if start >= 0 and end > 0: frag = re.sub(r",\s*([}\]])", r"\1", txt[start:end]) try: parsed = json.loads(frag) except json.JSONDecodeError: parsed = None # 7) Display results if parsed is not None: st.subheader("✅ JSON Output") st.json(parsed) else: st.subheader("🔍 Raw Output") st.text(answer)