AI_AGENT_TEST / app.py
Seth0330's picture
Update app.py
b66e512 verified
import streamlit as st
import pandas as pd
import numpy as np
import os
import requests
import json
import re
# — Page config
st.set_page_config(page_title="CSV-Backed AI Agent", layout="wide")
# — Title & image
st.title("CSV-Backed AI Agent")
st.image("./nadi-lok-image.png")
# — Load API key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
st.error("❌ OPENAI_API_KEY not set in Settings → Secrets.")
st.stop()
HEADERS = {
"Authorization": f"Bearer {OPENAI_API_KEY}",
"Content-Type": "application/json",
}
# — Sidebar: CSV upload & preview
st.sidebar.header("Upload CSV File")
uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv")
if uploaded_file:
try:
df = pd.read_csv(uploaded_file)
st.sidebar.success("File uploaded successfully!")
st.sidebar.write("Preview of the uploaded file:")
st.sidebar.dataframe(df.head())
except Exception as e:
st.sidebar.error(f"Error reading file: {e}")
df = None
else:
df = None
if df is not None:
st.markdown(f"**Loaded CSV:** {df.shape[0]} rows × {df.shape[1]} columns")
@st.cache_data(show_spinner=False)
def build_row_embeddings(df: pd.DataFrame):
# 1) Convert each row to a compact JSON string
texts = df.apply(lambda r: r.to_json(), axis=1).tolist()
# 2) Batch‐call the embeddings endpoint
embeddings = []
for i in range(0, len(texts), 100):
batch = texts[i : i + 100]
resp = requests.post(
"https://api.openai.com/v1/embeddings",
headers=HEADERS,
json={"model": "text-embedding-ada-002", "input": batch},
timeout=60,
)
resp.raise_for_status()
data = resp.json()["data"]
embeddings.extend(d["embedding"] for d in data)
return np.array(embeddings), texts
embeddings, row_texts = build_row_embeddings(df)
# — Prompt input
prompt = st.text_area(
"Enter your prompt for the agent",
placeholder="e.g. Which products have price > 100?",
height=150,
)
# — Run Agent
if st.button("Run Agent"):
if df is None:
st.error("Please upload a CSV file first.")
elif not prompt.strip():
st.error("Please enter a prompt.")
else:
# 1) Embed the prompt
q_resp = requests.post(
"https://api.openai.com/v1/embeddings",
headers=HEADERS,
json={"model": "text-embedding-ada-002", "input": [prompt]},
timeout=60,
)
q_resp.raise_for_status()
q_emb = np.array(q_resp.json()["data"][0]["embedding"])
# 2) Compute cosine similarities
norms = np.linalg.norm(embeddings, axis=1)
q_norm = np.linalg.norm(q_emb)
sims = embeddings.dot(q_emb) / (norms * q_norm + 1e-8)
# 3) Select top-K relevant rows
K = min(5, len(sims))
top_idxs = sims.argsort()[-K:][::-1]
relevant_rows = [row_texts[i] for i in top_idxs]
# 4) Build the chat messages
system_msg = {
"role": "system",
"content": (
"You are an AI agent that reads the provided CSV rows and answers the user's query. "
"Return your answer strictly as JSON (no extra explanation)."
),
}
memory_msg = {
"role": "system",
"content": "Relevant CSV rows:\n" + "\n".join(relevant_rows) + "\n<end of rows>",
}
user_msg = {"role": "user", "content": prompt}
# 5) Call the Chat API
chat_resp = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=HEADERS,
json={
"model": "gpt-3.5-turbo",
"messages": [system_msg, memory_msg, user_msg],
"temperature": 0,
"max_tokens": 1500,
},
timeout=60,
)
chat_resp.raise_for_status()
answer = chat_resp.json()["choices"][0]["message"]["content"]
# 6) Extract the JSON payload
txt = re.sub(r"```(?:json)?", "", answer).strip()
start = txt.find("{")
end = txt.rfind("}") + 1
parsed = None
if start >= 0 and end > 0:
frag = re.sub(r",\s*([}\]])", r"\1", txt[start:end])
try:
parsed = json.loads(frag)
except json.JSONDecodeError:
parsed = None
# 7) Display results
if parsed is not None:
st.subheader("✅ JSON Output")
st.json(parsed)
else:
st.subheader("🔍 Raw Output")
st.text(answer)