Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import requests | |
| import json | |
| import re | |
| # — Page config | |
| st.set_page_config(page_title="CSV-Backed AI Agent", layout="wide") | |
| # — Title & image | |
| st.title("CSV-Backed AI Agent") | |
| st.image("./nadi-lok-image.png") | |
| # — Load API key | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| if not OPENAI_API_KEY: | |
| st.error("❌ OPENAI_API_KEY not set in Settings → Secrets.") | |
| st.stop() | |
| HEADERS = { | |
| "Authorization": f"Bearer {OPENAI_API_KEY}", | |
| "Content-Type": "application/json", | |
| } | |
| # — Sidebar: CSV upload & preview | |
| st.sidebar.header("Upload CSV File") | |
| uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv") | |
| if uploaded_file: | |
| try: | |
| df = pd.read_csv(uploaded_file) | |
| st.sidebar.success("File uploaded successfully!") | |
| st.sidebar.write("Preview of the uploaded file:") | |
| st.sidebar.dataframe(df.head()) | |
| except Exception as e: | |
| st.sidebar.error(f"Error reading file: {e}") | |
| df = None | |
| else: | |
| df = None | |
| if df is not None: | |
| st.markdown(f"**Loaded CSV:** {df.shape[0]} rows × {df.shape[1]} columns") | |
| def build_row_embeddings(df: pd.DataFrame): | |
| # 1) Convert each row to a compact JSON string | |
| texts = df.apply(lambda r: r.to_json(), axis=1).tolist() | |
| # 2) Batch‐call the embeddings endpoint | |
| embeddings = [] | |
| for i in range(0, len(texts), 100): | |
| batch = texts[i : i + 100] | |
| resp = requests.post( | |
| "https://api.openai.com/v1/embeddings", | |
| headers=HEADERS, | |
| json={"model": "text-embedding-ada-002", "input": batch}, | |
| timeout=60, | |
| ) | |
| resp.raise_for_status() | |
| data = resp.json()["data"] | |
| embeddings.extend(d["embedding"] for d in data) | |
| return np.array(embeddings), texts | |
| embeddings, row_texts = build_row_embeddings(df) | |
| # — Prompt input | |
| prompt = st.text_area( | |
| "Enter your prompt for the agent", | |
| placeholder="e.g. Which products have price > 100?", | |
| height=150, | |
| ) | |
| # — Run Agent | |
| if st.button("Run Agent"): | |
| if df is None: | |
| st.error("Please upload a CSV file first.") | |
| elif not prompt.strip(): | |
| st.error("Please enter a prompt.") | |
| else: | |
| # 1) Embed the prompt | |
| q_resp = requests.post( | |
| "https://api.openai.com/v1/embeddings", | |
| headers=HEADERS, | |
| json={"model": "text-embedding-ada-002", "input": [prompt]}, | |
| timeout=60, | |
| ) | |
| q_resp.raise_for_status() | |
| q_emb = np.array(q_resp.json()["data"][0]["embedding"]) | |
| # 2) Compute cosine similarities | |
| norms = np.linalg.norm(embeddings, axis=1) | |
| q_norm = np.linalg.norm(q_emb) | |
| sims = embeddings.dot(q_emb) / (norms * q_norm + 1e-8) | |
| # 3) Select top-K relevant rows | |
| K = min(5, len(sims)) | |
| top_idxs = sims.argsort()[-K:][::-1] | |
| relevant_rows = [row_texts[i] for i in top_idxs] | |
| # 4) Build the chat messages | |
| system_msg = { | |
| "role": "system", | |
| "content": ( | |
| "You are an AI agent that reads the provided CSV rows and answers the user's query. " | |
| "Return your answer strictly as JSON (no extra explanation)." | |
| ), | |
| } | |
| memory_msg = { | |
| "role": "system", | |
| "content": "Relevant CSV rows:\n" + "\n".join(relevant_rows) + "\n<end of rows>", | |
| } | |
| user_msg = {"role": "user", "content": prompt} | |
| # 5) Call the Chat API | |
| chat_resp = requests.post( | |
| "https://api.openai.com/v1/chat/completions", | |
| headers=HEADERS, | |
| json={ | |
| "model": "gpt-3.5-turbo", | |
| "messages": [system_msg, memory_msg, user_msg], | |
| "temperature": 0, | |
| "max_tokens": 1500, | |
| }, | |
| timeout=60, | |
| ) | |
| chat_resp.raise_for_status() | |
| answer = chat_resp.json()["choices"][0]["message"]["content"] | |
| # 6) Extract the JSON payload | |
| txt = re.sub(r"```(?:json)?", "", answer).strip() | |
| start = txt.find("{") | |
| end = txt.rfind("}") + 1 | |
| parsed = None | |
| if start >= 0 and end > 0: | |
| frag = re.sub(r",\s*([}\]])", r"\1", txt[start:end]) | |
| try: | |
| parsed = json.loads(frag) | |
| except json.JSONDecodeError: | |
| parsed = None | |
| # 7) Display results | |
| if parsed is not None: | |
| st.subheader("✅ JSON Output") | |
| st.json(parsed) | |
| else: | |
| st.subheader("🔍 Raw Output") | |
| st.text(answer) | |