Seth0330 commited on
Commit
46c8eb0
·
verified ·
1 Parent(s): 657f503

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +249 -75
app.py CHANGED
@@ -1,103 +1,277 @@
1
  import streamlit as st
 
2
  import json
3
- from langchain.chat_models import ChatOpenAI
4
- from langchain.agents import initialize_agent, Tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
 
6
  def normalize(s):
7
  return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
8
 
9
  def is_fuzzy_match(a, b, threshold=0.7):
10
- from difflib import SequenceMatcher
11
- ratio = SequenceMatcher(None, a, b).ratio()
12
  return ratio >= threshold or a in b or b in a
13
 
14
- def recursive_fuzzy_value_search(target_value):
 
15
  matches = []
16
- norm_target = normalize(target_value)
17
  for file_name, data in st.session_state.json_data.items():
18
- def _search(obj, path):
19
  if isinstance(obj, dict):
20
  for k, v in obj.items():
21
- if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(norm_target, normalize(v)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  matches.append({
23
  "file": file_name,
24
  "key": k,
25
- "path": ".".join(path + [k]),
26
- "value": v
27
  })
28
- _search(v, path + [k])
29
  elif isinstance(obj, list):
30
- for idx, item in enumerate(obj):
31
- _search(item, path + [f"[{idx}]"])
32
- _search(data, [])
33
  return matches
34
 
35
- # LangChain Tool for LLM
36
- def json_search_tool(query: str) -> str:
37
- """Search all uploaded JSON files for any value (fuzzy match); returns matching fields and values."""
38
- results = recursive_fuzzy_value_search(query)
39
- if not results:
40
- return f"No match for '{query}'."
41
- answer = []
42
- for res in results:
43
- answer.append(f"{res['file']} | {res['key']} ({res['path']}): {res['value']}")
44
- return "\n".join(answer)
45
-
46
- # Streamlit UI
47
- if "json_data" not in st.session_state:
48
- st.session_state.json_data = {}
49
- if "chat_history" not in st.session_state:
50
- st.session_state.chat_history = []
51
-
52
- st.set_page_config(page_title="Chat with Your JSONs!", layout="wide")
53
- st.title("Chat with Your JSON Files (powered by GPT + instant JSON search)")
54
 
55
- uploaded_files = st.sidebar.file_uploader(
56
- "Choose one or more JSON files", type="json", accept_multiple_files=True
57
- )
58
- if uploaded_files:
59
- st.session_state.json_data.clear()
60
- for f in uploaded_files:
61
- content = json.load(f)
62
- st.session_state.json_data[f.name] = content
63
- st.sidebar.success("All JSON files loaded.")
 
 
 
 
 
 
 
 
 
64
 
65
- import os
66
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
67
- if not OPENAI_API_KEY:
68
- st.warning("You must set your OPENAI_API_KEY for chat.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- else:
71
- llm = ChatOpenAI(model_name="gpt-4.1", openai_api_key=OPENAI_API_KEY)
72
- tools = [
73
- Tool(
74
- name="json_search",
75
- func=json_search_tool,
76
- description="Find any value (name, product, number, etc) across all loaded JSON files. Input is what the user wants to find (e.g. 'iphone', 'apps installed', or 'alice')."
77
- )
78
- ]
79
- agent = initialize_agent(
80
- tools=tools,
81
- llm=llm,
82
- agent="chat-conversational-react-description",
83
- verbose=False
84
  )
 
85
 
86
- for msg in st.session_state.chat_history:
87
- if msg["role"] == "user":
88
- st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
89
- else:
90
- st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
 
 
 
 
 
91
 
92
- def send_chat():
 
 
 
 
 
 
 
93
  user_input = st.session_state.temp_input
94
  if user_input.strip():
95
- st.session_state.chat_history.append({"role": "user", "content": user_input})
96
- agent_reply = agent.run(user_input)
97
- st.session_state.chat_history.append({"role": "assistant", "content": agent_reply})
98
- st.session_state.temp_input = ""
99
-
100
- if st.session_state.json_data:
101
- st.text_input("Your message:", key="temp_input", on_change=send_chat)
102
- else:
103
- st.info("Please upload at least one JSON file to start chatting.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
3
  import json
4
+ import requests
5
+ import traceback
6
+ import difflib
7
+
8
+ # --- SESSION STATE ---
9
+ if "json_data" not in st.session_state:
10
+ st.session_state.json_data = {}
11
+ if "messages" not in st.session_state:
12
+ st.session_state.messages = []
13
+ if "files_loaded" not in st.session_state:
14
+ st.session_state.files_loaded = False
15
+ if "temp_input" not in st.session_state:
16
+ st.session_state.temp_input = ""
17
+
18
+ st.set_page_config(page_title="Chat with Your JSONs", layout="wide")
19
+ st.title("Chat with Your JSON Files (OpenAI function-calling, No LangChain)")
20
+
21
+ # --- UPLOAD FILES ---
22
+ uploaded_files = st.sidebar.file_uploader(
23
+ "Choose one or more JSON files", type="json", accept_multiple_files=True
24
+ )
25
+ if uploaded_files and not st.session_state.files_loaded:
26
+ st.session_state.json_data.clear()
27
+ for f in uploaded_files:
28
+ try:
29
+ content = json.load(f)
30
+ st.session_state.json_data[f.name] = content
31
+ st.sidebar.success(f"Loaded: {f.name}")
32
+ except Exception as e:
33
+ st.sidebar.error(f"Error reading {f.name}: {e}")
34
+ st.session_state.files_loaded = True
35
+ st.session_state.messages = []
36
+ elif not uploaded_files:
37
+ st.session_state.json_data.clear()
38
+ st.session_state.files_loaded = False
39
 
40
+ # --- NORMALIZE ---
41
  def normalize(s):
42
  return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
43
 
44
  def is_fuzzy_match(a, b, threshold=0.7):
45
+ ratio = difflib.SequenceMatcher(None, a, b).ratio()
 
46
  return ratio >= threshold or a in b or b in a
47
 
48
+ # --- FLEXIBLE SEARCH (ALL VALUES, ALL TYPES) ---
49
+ def search_all_jsons(key, value):
50
  matches = []
51
+ value_norm = normalize(value)
52
  for file_name, data in st.session_state.json_data.items():
53
+ def recursive_search(obj):
54
  if isinstance(obj, dict):
55
  for k, v in obj.items():
56
+ if normalize(k) == normalize(key):
57
+ if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(value_norm, normalize(v)):
58
+ matches.append({
59
+ "file": file_name,
60
+ "key": k,
61
+ "value": v,
62
+ "record": obj
63
+ })
64
+ recursive_search(v)
65
+ elif isinstance(obj, list):
66
+ for item in obj:
67
+ recursive_search(item)
68
+ recursive_search(data)
69
+ return matches
70
+
71
+ def fuzzy_value_search(value):
72
+ matches = []
73
+ value_norm = normalize(value)
74
+ for file_name, data in st.session_state.json_data.items():
75
+ def recursive_search(obj):
76
+ if isinstance(obj, dict):
77
+ for k, v in obj.items():
78
+ if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(value_norm, normalize(v)):
79
  matches.append({
80
  "file": file_name,
81
  "key": k,
82
+ "value": v,
83
+ "record": obj
84
  })
85
+ recursive_search(v)
86
  elif isinstance(obj, list):
87
+ for item in obj:
88
+ recursive_search(item)
89
+ recursive_search(data)
90
  return matches
91
 
92
+ def list_keys(file_name):
93
+ try:
94
+ data = st.session_state.json_data[file_name]
95
+ if isinstance(data, dict):
96
+ return list(data.keys())
97
+ elif isinstance(data, list) and data and isinstance(data[0], dict):
98
+ return list(data[0].keys())
99
+ else:
100
+ return []
101
+ except Exception as e:
102
+ return {"error": str(e)}
 
 
 
 
 
 
 
 
103
 
104
+ def count_key_occurrences(file_name, key):
105
+ try:
106
+ data = st.session_state.json_data[file_name]
107
+ count = 0
108
+ def recursive(obj):
109
+ nonlocal count
110
+ if isinstance(obj, dict):
111
+ for k, v in obj.items():
112
+ if normalize(k) == normalize(key):
113
+ count += 1
114
+ recursive(v)
115
+ elif isinstance(obj, list):
116
+ for item in obj:
117
+ recursive(item)
118
+ recursive(data)
119
+ return count
120
+ except Exception as e:
121
+ return {"error": str(e)}
122
 
123
+ # --- FUNCTION SCHEMA for OpenAI ---
124
+ function_schema = [
125
+ {
126
+ "name": "search_all_jsons",
127
+ "description": "Recursively search all uploaded JSONs for all records where a key matches a value (fuzzy, any type).",
128
+ "parameters": {
129
+ "type": "object",
130
+ "properties": {
131
+ "key": {"type": "string"},
132
+ "value": {"type": "string"}
133
+ },
134
+ "required": ["key", "value"]
135
+ }
136
+ },
137
+ {
138
+ "name": "fuzzy_value_search",
139
+ "description": "Search all uploaded JSONs for any record with a field value matching (fuzzy, all types).",
140
+ "parameters": {
141
+ "type": "object",
142
+ "properties": {
143
+ "value": {"type": "string"}
144
+ },
145
+ "required": ["value"]
146
+ }
147
+ },
148
+ {
149
+ "name": "list_keys",
150
+ "description": "List top-level keys in a given JSON file.",
151
+ "parameters": {
152
+ "type": "object",
153
+ "properties": {
154
+ "file_name": {"type": "string"}
155
+ },
156
+ "required": ["file_name"]
157
+ }
158
+ },
159
+ {
160
+ "name": "count_key_occurrences",
161
+ "description": "Count number of times a key appears in a file.",
162
+ "parameters": {
163
+ "type": "object",
164
+ "properties": {
165
+ "file_name": {"type": "string"},
166
+ "key": {"type": "string"}
167
+ },
168
+ "required": ["file_name", "key"]
169
+ }
170
+ }
171
+ ]
172
 
173
+ # --- SYSTEM PROMPT: Set expectations for OpenAI! ---
174
+ system_message = {
175
+ "role": "system",
176
+ "content": (
177
+ "You are a JSON data assistant. Use the functions provided to answer the user's question. "
178
+ "If the user's query does not mention a key, use 'fuzzy_value_search' to match on any value. "
179
+ "If a key is mentioned (like 'apps_installed'), use 'search_all_jsons' for that key and the value. "
180
+ "You may use 'list_keys' to help discover the file structure if needed. "
181
+ "Always give a direct answer from the data if possible."
 
 
 
 
 
182
  )
183
+ }
184
 
185
+ # --- CHAT UI (with OpenAI function-calling!) ---
186
+ st.markdown("### Ask any question about your data, just like ChatGPT.")
187
+
188
+ for msg in st.session_state.messages:
189
+ if msg["role"] == "user":
190
+ st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
191
+ elif msg["role"] == "assistant":
192
+ st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
193
+ elif msg["role"] == "function":
194
+ st.markdown(f"<details><summary><b>Function '{msg['name']}' output:</b></summary><pre>{msg['content']}</pre></details>", unsafe_allow_html=True)
195
 
196
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
197
+ HEADERS = {
198
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
199
+ "Content-Type": "application/json",
200
+ }
201
+
202
+ def send_message():
203
+ try:
204
  user_input = st.session_state.temp_input
205
  if user_input.strip():
206
+ st.session_state.messages.append({"role": "user", "content": user_input})
207
+ chat_messages = [system_message] + st.session_state.messages[-10:]
208
+ chat_resp = requests.post(
209
+ "https://api.openai.com/v1/chat/completions",
210
+ headers=HEADERS,
211
+ json={
212
+ "model": "gpt-4.1",
213
+ "messages": chat_messages,
214
+ "functions": function_schema,
215
+ "function_call": "auto",
216
+ "temperature": 0,
217
+ "max_tokens": 1200,
218
+ },
219
+ timeout=60,
220
+ )
221
+ chat_resp.raise_for_status()
222
+ response_json = chat_resp.json()
223
+ msg = response_json["choices"][0]["message"]
224
+
225
+ if msg.get("function_call"):
226
+ func_name = msg["function_call"]["name"]
227
+ args_json = msg["function_call"]["arguments"]
228
+ args = json.loads(args_json)
229
+
230
+ # Call the right function
231
+ if func_name == "search_all_jsons":
232
+ result = search_all_jsons(args.get("key"), args.get("value"))
233
+ elif func_name == "fuzzy_value_search":
234
+ result = fuzzy_value_search(args.get("value"))
235
+ elif func_name == "list_keys":
236
+ result = list_keys(args.get("file_name"))
237
+ elif func_name == "count_key_occurrences":
238
+ result = count_key_occurrences(args.get("file_name"), args.get("key"))
239
+ else:
240
+ result = {"error": f"Unknown function: {func_name}"}
241
+
242
+ st.session_state.messages.append({
243
+ "role": "function",
244
+ "name": func_name,
245
+ "content": json.dumps(result, indent=2),
246
+ })
247
+
248
+ # Let LLM observe function output and craft final answer
249
+ followup_messages = chat_messages + [
250
+ {"role": "function", "name": func_name, "content": json.dumps(result, indent=2)}
251
+ ]
252
+ final_resp = requests.post(
253
+ "https://api.openai.com/v1/chat/completions",
254
+ headers=HEADERS,
255
+ json={
256
+ "model": "gpt-4.1",
257
+ "messages": followup_messages,
258
+ "temperature": 0,
259
+ "max_tokens": 1200,
260
+ },
261
+ timeout=60,
262
+ )
263
+ final_resp.raise_for_status()
264
+ final_json = final_resp.json()
265
+ answer = final_json["choices"][0]["message"]["content"]
266
+ st.session_state.messages.append({"role": "assistant", "content": answer})
267
+ else:
268
+ st.session_state.messages.append({"role": "assistant", "content": msg["content"]})
269
+ st.session_state.temp_input = ""
270
+ except Exception as e:
271
+ st.error("Exception: " + str(e))
272
+ st.code(traceback.format_exc())
273
+
274
+ if st.session_state.json_data:
275
+ st.text_input("Your message:", key="temp_input", on_change=send_message)
276
+ else:
277
+ st.info("Please upload at least one JSON file to start chatting.")