Seth0330 commited on
Commit
b66e512
·
verified ·
1 Parent(s): 8bf44c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -30
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import os
4
- import openai
5
  import numpy as np
 
 
6
  import json
7
  import re
8
 
@@ -13,13 +13,18 @@ st.set_page_config(page_title="CSV-Backed AI Agent", layout="wide")
13
  st.title("CSV-Backed AI Agent")
14
  st.image("./nadi-lok-image.png")
15
 
16
- # — Load your OpenAI key
17
- openai.api_key = os.getenv("OPENAI_API_KEY")
18
- if not openai.api_key:
19
  st.error("❌ OPENAI_API_KEY not set in Settings → Secrets.")
20
  st.stop()
21
 
22
- # Sidebar CSV upload
 
 
 
 
 
23
  st.sidebar.header("Upload CSV File")
24
  uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv")
25
 
@@ -27,7 +32,7 @@ if uploaded_file:
27
  try:
28
  df = pd.read_csv(uploaded_file)
29
  st.sidebar.success("File uploaded successfully!")
30
- st.sidebar.write("Preview:")
31
  st.sidebar.dataframe(df.head())
32
  except Exception as e:
33
  st.sidebar.error(f"Error reading file: {e}")
@@ -40,17 +45,24 @@ if df is not None:
40
 
41
  @st.cache_data(show_spinner=False)
42
  def build_row_embeddings(df: pd.DataFrame):
43
- # Serialize each row to a compact JSON string
44
- texts = df.apply(lambda row: row.to_json(), axis=1).tolist()
45
 
46
- # Batch‐call embeddings
47
- all_embs = []
48
  for i in range(0, len(texts), 100):
49
  batch = texts[i : i + 100]
50
- resp = openai.Embedding.create(model="text-embedding-ada-002", input=batch)
51
- all_embs.extend([d["embedding"] for d in resp["data"]])
52
-
53
- return np.array(all_embs), texts
 
 
 
 
 
 
 
54
 
55
  embeddings, row_texts = build_row_embeddings(df)
56
 
@@ -69,18 +81,26 @@ if st.button("Run Agent"):
69
  st.error("Please enter a prompt.")
70
  else:
71
  # 1) Embed the prompt
72
- q_resp = openai.Embedding.create(model="text-embedding-ada-002", input=[prompt])
73
- q_emb = np.array(q_resp["data"][0]["embedding"])
 
 
 
 
 
 
 
74
  # 2) Compute cosine similarities
75
- row_norms = np.linalg.norm(embeddings, axis=1)
76
  q_norm = np.linalg.norm(q_emb)
77
- sims = embeddings.dot(q_emb) / (row_norms * q_norm + 1e-8)
78
- # 3) Pick top K rows (e.g. 5)
 
79
  K = min(5, len(sims))
80
  top_idxs = sims.argsort()[-K:][::-1]
81
  relevant_rows = [row_texts[i] for i in top_idxs]
82
 
83
- # 4) Build the messages
84
  system_msg = {
85
  "role": "system",
86
  "content": (
@@ -94,16 +114,22 @@ if st.button("Run Agent"):
94
  }
95
  user_msg = {"role": "user", "content": prompt}
96
 
97
- # 5) Call ChatCompletion
98
- chat = openai.ChatCompletion.create(
99
- model="gpt-3.5-turbo",
100
- messages=[system_msg, memory_msg, user_msg],
101
- temperature=0,
102
- max_tokens=1500,
 
 
 
 
 
103
  )
104
- answer = chat.choices[0].message.content
 
105
 
106
- # 6) Extract JSON
107
  txt = re.sub(r"```(?:json)?", "", answer).strip()
108
  start = txt.find("{")
109
  end = txt.rfind("}") + 1
@@ -115,7 +141,7 @@ if st.button("Run Agent"):
115
  except json.JSONDecodeError:
116
  parsed = None
117
 
118
- # 7) Display
119
  if parsed is not None:
120
  st.subheader("✅ JSON Output")
121
  st.json(parsed)
 
1
  import streamlit as st
2
  import pandas as pd
 
 
3
  import numpy as np
4
+ import os
5
+ import requests
6
  import json
7
  import re
8
 
 
13
  st.title("CSV-Backed AI Agent")
14
  st.image("./nadi-lok-image.png")
15
 
16
+ # — Load API key
17
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
+ if not OPENAI_API_KEY:
19
  st.error("❌ OPENAI_API_KEY not set in Settings → Secrets.")
20
  st.stop()
21
 
22
+ HEADERS = {
23
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
24
+ "Content-Type": "application/json",
25
+ }
26
+
27
+ # — Sidebar: CSV upload & preview
28
  st.sidebar.header("Upload CSV File")
29
  uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv")
30
 
 
32
  try:
33
  df = pd.read_csv(uploaded_file)
34
  st.sidebar.success("File uploaded successfully!")
35
+ st.sidebar.write("Preview of the uploaded file:")
36
  st.sidebar.dataframe(df.head())
37
  except Exception as e:
38
  st.sidebar.error(f"Error reading file: {e}")
 
45
 
46
  @st.cache_data(show_spinner=False)
47
  def build_row_embeddings(df: pd.DataFrame):
48
+ # 1) Convert each row to a compact JSON string
49
+ texts = df.apply(lambda r: r.to_json(), axis=1).tolist()
50
 
51
+ # 2) Batch‐call the embeddings endpoint
52
+ embeddings = []
53
  for i in range(0, len(texts), 100):
54
  batch = texts[i : i + 100]
55
+ resp = requests.post(
56
+ "https://api.openai.com/v1/embeddings",
57
+ headers=HEADERS,
58
+ json={"model": "text-embedding-ada-002", "input": batch},
59
+ timeout=60,
60
+ )
61
+ resp.raise_for_status()
62
+ data = resp.json()["data"]
63
+ embeddings.extend(d["embedding"] for d in data)
64
+
65
+ return np.array(embeddings), texts
66
 
67
  embeddings, row_texts = build_row_embeddings(df)
68
 
 
81
  st.error("Please enter a prompt.")
82
  else:
83
  # 1) Embed the prompt
84
+ q_resp = requests.post(
85
+ "https://api.openai.com/v1/embeddings",
86
+ headers=HEADERS,
87
+ json={"model": "text-embedding-ada-002", "input": [prompt]},
88
+ timeout=60,
89
+ )
90
+ q_resp.raise_for_status()
91
+ q_emb = np.array(q_resp.json()["data"][0]["embedding"])
92
+
93
  # 2) Compute cosine similarities
94
+ norms = np.linalg.norm(embeddings, axis=1)
95
  q_norm = np.linalg.norm(q_emb)
96
+ sims = embeddings.dot(q_emb) / (norms * q_norm + 1e-8)
97
+
98
+ # 3) Select top-K relevant rows
99
  K = min(5, len(sims))
100
  top_idxs = sims.argsort()[-K:][::-1]
101
  relevant_rows = [row_texts[i] for i in top_idxs]
102
 
103
+ # 4) Build the chat messages
104
  system_msg = {
105
  "role": "system",
106
  "content": (
 
114
  }
115
  user_msg = {"role": "user", "content": prompt}
116
 
117
+ # 5) Call the Chat API
118
+ chat_resp = requests.post(
119
+ "https://api.openai.com/v1/chat/completions",
120
+ headers=HEADERS,
121
+ json={
122
+ "model": "gpt-3.5-turbo",
123
+ "messages": [system_msg, memory_msg, user_msg],
124
+ "temperature": 0,
125
+ "max_tokens": 1500,
126
+ },
127
+ timeout=60,
128
  )
129
+ chat_resp.raise_for_status()
130
+ answer = chat_resp.json()["choices"][0]["message"]["content"]
131
 
132
+ # 6) Extract the JSON payload
133
  txt = re.sub(r"```(?:json)?", "", answer).strip()
134
  start = txt.find("{")
135
  end = txt.rfind("}") + 1
 
141
  except json.JSONDecodeError:
142
  parsed = None
143
 
144
+ # 7) Display results
145
  if parsed is not None:
146
  st.subheader("✅ JSON Output")
147
  st.json(parsed)