Seth0330 commited on
Commit
aa866f2
·
verified ·
1 Parent(s): f73d69f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -9
app.py CHANGED
@@ -5,7 +5,10 @@ import requests
5
  import traceback
6
  import difflib
7
 
8
- # --- SESSION STATE ---
 
 
 
9
  if "json_data" not in st.session_state:
10
  st.session_state.json_data = {}
11
  if "messages" not in st.session_state:
@@ -18,7 +21,7 @@ if "temp_input" not in st.session_state:
18
  st.set_page_config(page_title="Chat with Your JSONs", layout="wide")
19
  st.title("Chat with Your JSON Files (OpenAI function-calling, No LangChain)")
20
 
21
- # --- UPLOAD FILES ---
22
  uploaded_files = st.sidebar.file_uploader(
23
  "Choose one or more JSON files", type="json", accept_multiple_files=True
24
  )
@@ -37,7 +40,7 @@ elif not uploaded_files:
37
  st.session_state.json_data.clear()
38
  st.session_state.files_loaded = False
39
 
40
- # --- NORMALIZE & FUZZY ---
41
  def normalize(s):
42
  return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
43
 
@@ -45,7 +48,7 @@ def is_fuzzy_match(a, b, threshold=0.7):
45
  ratio = difflib.SequenceMatcher(None, a, b).ratio()
46
  return ratio >= threshold or a in b or b in a
47
 
48
- # --- FLEXIBLE SEARCH (ALL VALUES, ALL TYPES) ---
49
  def search_all_jsons(key, value):
50
  matches = []
51
  value_norm = normalize(value)
@@ -68,6 +71,7 @@ def search_all_jsons(key, value):
68
  recursive_search(data)
69
  return matches
70
 
 
71
  def fuzzy_value_search(value):
72
  matches = []
73
  value_norm = normalize(value)
@@ -89,6 +93,7 @@ def fuzzy_value_search(value):
89
  recursive_search(data)
90
  return matches
91
 
 
92
  def list_keys(file_name):
93
  try:
94
  data = st.session_state.json_data[file_name]
@@ -101,6 +106,7 @@ def list_keys(file_name):
101
  except Exception as e:
102
  return {"error": str(e)}
103
 
 
104
  def count_key_occurrences(file_name, key):
105
  try:
106
  data = st.session_state.json_data[file_name]
@@ -120,9 +126,8 @@ def count_key_occurrences(file_name, key):
120
  except Exception as e:
121
  return {"error": str(e)}
122
 
123
- # --- NEW: FIND/COUNT IN ARRAYS ---
124
  def find_in_arrays(key, value, return_count=True):
125
- # Searches ALL arrays for dicts where key == value
126
  matches = []
127
  count = 0
128
  for file_name, data in st.session_state.json_data.items():
@@ -146,7 +151,54 @@ def find_in_arrays(key, value, return_count=True):
146
  recursive(data)
147
  return count if return_count else matches
148
 
149
- # --- FUNCTION SCHEMA for OpenAI ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  function_schema = [
151
  {
152
  "name": "search_all_jsons",
@@ -206,15 +258,37 @@ function_schema = [
206
  },
207
  "required": ["key", "value"]
208
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  }
210
  ]
211
 
212
- # --- SYSTEM PROMPT ---
213
  system_message = {
214
  "role": "system",
215
  "content": (
216
  "You are a JSON data assistant. Use the functions provided to answer the user's question. "
217
  "If the user asks for the number or details of items in a list/array (e.g., completed tasks), use 'find_in_arrays'. "
 
 
218
  "If the user's query does not mention a key, use 'fuzzy_value_search' to match on any value. "
219
  "If a key is mentioned (like 'apps_installed'), use 'search_all_jsons' for that key and the value. "
220
  "You may use 'list_keys' to help discover the file structure if needed. "
@@ -222,7 +296,7 @@ system_message = {
222
  )
223
  }
224
 
225
- # --- CHAT UI (with OpenAI function-calling!) ---
226
  st.markdown("### Ask any question about your data, just like ChatGPT.")
227
 
228
  for msg in st.session_state.messages:
@@ -282,6 +356,13 @@ def send_message():
282
  args.get("value"),
283
  args.get("return_count", True)
284
  )
 
 
 
 
 
 
 
285
  else:
286
  result = {"error": f"Unknown function: {func_name}"}
287
 
 
5
  import traceback
6
  import difflib
7
 
8
+ # ---- BASIC NAME GUESS FOR FEMALE NAMES (expand as needed)
9
+ COMMON_FEMALE_NAMES = {"alice", "mary", "lisa", "jane", "emily", "sophia", "emma", "olivia", "ava", "mia", "isabella", "charlotte", "amelia", "harper", "abigail"}
10
+
11
+ # ---- SESSION STATE ----
12
  if "json_data" not in st.session_state:
13
  st.session_state.json_data = {}
14
  if "messages" not in st.session_state:
 
21
  st.set_page_config(page_title="Chat with Your JSONs", layout="wide")
22
  st.title("Chat with Your JSON Files (OpenAI function-calling, No LangChain)")
23
 
24
+ # ---- UPLOAD FILES ----
25
  uploaded_files = st.sidebar.file_uploader(
26
  "Choose one or more JSON files", type="json", accept_multiple_files=True
27
  )
 
40
  st.session_state.json_data.clear()
41
  st.session_state.files_loaded = False
42
 
43
+ # ---- UTILS ----
44
  def normalize(s):
45
  return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
46
 
 
48
  ratio = difflib.SequenceMatcher(None, a, b).ratio()
49
  return ratio >= threshold or a in b or b in a
50
 
51
+ # ---- SEARCH ALL KEYS FOR KEY/VALUE MATCH ----
52
  def search_all_jsons(key, value):
53
  matches = []
54
  value_norm = normalize(value)
 
71
  recursive_search(data)
72
  return matches
73
 
74
+ # ---- FUZZY VALUE SEARCH (returns WHOLE RECORD) ----
75
  def fuzzy_value_search(value):
76
  matches = []
77
  value_norm = normalize(value)
 
93
  recursive_search(data)
94
  return matches
95
 
96
+ # ---- LIST KEYS ----
97
  def list_keys(file_name):
98
  try:
99
  data = st.session_state.json_data[file_name]
 
106
  except Exception as e:
107
  return {"error": str(e)}
108
 
109
+ # ---- COUNT KEY OCCURRENCES ----
110
  def count_key_occurrences(file_name, key):
111
  try:
112
  data = st.session_state.json_data[file_name]
 
126
  except Exception as e:
127
  return {"error": str(e)}
128
 
129
+ # ---- FIND/COUNT IN ARRAYS (e.g., COMPLETED TASKS) ----
130
  def find_in_arrays(key, value, return_count=True):
 
131
  matches = []
132
  count = 0
133
  for file_name, data in st.session_state.json_data.items():
 
151
  recursive(data)
152
  return count if return_count else matches
153
 
154
+ # ---- SUM FIELD BY NAME (e.g., TOTAL AMOUNT FOR JOHNNY) ----
155
+ def sum_field_by_name(name, field="amount"):
156
+ total = 0
157
+ details = []
158
+ name_norm = normalize(name)
159
+ for file_name, data in st.session_state.json_data.items():
160
+ def recursive(obj):
161
+ nonlocal total
162
+ if isinstance(obj, dict):
163
+ for k, v in obj.items():
164
+ if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(name_norm, normalize(v)):
165
+ # Look for field in this or sibling dict
166
+ if field in obj:
167
+ try:
168
+ amt = float(obj[field])
169
+ total += amt
170
+ details.append({"file": file_name, "name_match": v, "amount": amt, "record": obj})
171
+ except Exception:
172
+ pass
173
+ recursive(v)
174
+ elif isinstance(obj, list):
175
+ for item in obj:
176
+ recursive(item)
177
+ recursive(data)
178
+ return {"total": total, "matches": details}
179
+
180
+ # ---- COUNT FEMALE NAMES (guess from common names) ----
181
+ def count_female_names():
182
+ count = 0
183
+ names = []
184
+ for file_name, data in st.session_state.json_data.items():
185
+ def recursive(obj):
186
+ nonlocal count
187
+ if isinstance(obj, dict):
188
+ for k, v in obj.items():
189
+ if k.lower() in {"name", "fullName", "firstName"}:
190
+ first_name = str(v).split()[0].lower()
191
+ if first_name in COMMON_FEMALE_NAMES:
192
+ count += 1
193
+ names.append({"file": file_name, "name": v, "record": obj})
194
+ recursive(v)
195
+ elif isinstance(obj, list):
196
+ for item in obj:
197
+ recursive(item)
198
+ recursive(data)
199
+ return {"count": count, "names": names}
200
+
201
+ # ---- FUNCTION SCHEMA for OpenAI ----
202
  function_schema = [
203
  {
204
  "name": "search_all_jsons",
 
258
  },
259
  "required": ["key", "value"]
260
  }
261
+ },
262
+ {
263
+ "name": "sum_field_by_name",
264
+ "description": "Sum a field (e.g. amount) for any record containing a name/email/identifier. Returns total and breakdown.",
265
+ "parameters": {
266
+ "type": "object",
267
+ "properties": {
268
+ "name": {"type": "string", "description": "Name or identifier to match"},
269
+ "field": {"type": "string", "description": "The numeric field to sum, e.g. 'amount'"},
270
+ },
271
+ "required": ["name", "field"]
272
+ }
273
+ },
274
+ {
275
+ "name": "count_female_names",
276
+ "description": "Count the number of common female names based on a preset list.",
277
+ "parameters": {
278
+ "type": "object",
279
+ "properties": {},
280
+ }
281
  }
282
  ]
283
 
284
+ # ---- SYSTEM PROMPT ----
285
  system_message = {
286
  "role": "system",
287
  "content": (
288
  "You are a JSON data assistant. Use the functions provided to answer the user's question. "
289
  "If the user asks for the number or details of items in a list/array (e.g., completed tasks), use 'find_in_arrays'. "
290
+ "If the user asks about the sum/total of a field for a name or identifier, use 'sum_field_by_name'. "
291
+ "If the user asks about female names, use 'count_female_names'. "
292
  "If the user's query does not mention a key, use 'fuzzy_value_search' to match on any value. "
293
  "If a key is mentioned (like 'apps_installed'), use 'search_all_jsons' for that key and the value. "
294
  "You may use 'list_keys' to help discover the file structure if needed. "
 
296
  )
297
  }
298
 
299
+ # ---- CHAT UI (with OpenAI function-calling!) ----
300
  st.markdown("### Ask any question about your data, just like ChatGPT.")
301
 
302
  for msg in st.session_state.messages:
 
356
  args.get("value"),
357
  args.get("return_count", True)
358
  )
359
+ elif func_name == "sum_field_by_name":
360
+ result = sum_field_by_name(
361
+ args.get("name"),
362
+ args.get("field", "amount")
363
+ )
364
+ elif func_name == "count_female_names":
365
+ result = count_female_names()
366
  else:
367
  result = {"error": f"Unknown function: {func_name}"}
368