Seth0330 commited on
Commit
4cba20f
·
verified ·
1 Parent(s): 5d4ff05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -38
app.py CHANGED
@@ -22,21 +22,25 @@ if "ingested_batches" not in st.session_state:
22
  st.session_state.ingested_batches = 0
23
  if "messages" not in st.session_state:
24
  st.session_state.messages = []
 
 
 
 
 
 
25
 
26
- st.set_page_config(page_title="Chat with Your JSON Vectors (Hybrid, Enhanced)", layout="wide")
27
- st.title("Chat with Your Vectorized JSON Files (Hybrid Retrieval, SQLite, LLM)")
28
 
29
  uploaded_files = st.file_uploader(
30
  "Upload JSON files in batches (any structure)", type="json", accept_multiple_files=True
31
  )
32
 
33
- # --- Enhanced flattening (never loses parent fields)
34
  def flatten_json_obj(obj, parent_key="", sep="."):
35
  items = {}
36
  if isinstance(obj, dict):
37
  for k, v in obj.items():
38
  new_key = f"{parent_key}{sep}{k}" if parent_key else k
39
- # If this is a customer/email field, extract name!
40
  if (
41
  k.lower() in {"customer", "user", "email", "username"} and
42
  isinstance(v, str) and "@" in v
@@ -56,20 +60,6 @@ def flatten_json_obj(obj, parent_key="", sep="."):
56
  items[parent_key] = obj
57
  return items
58
 
59
- # --- DEBUG: Show flattening of uploaded JSONs
60
- if uploaded_files:
61
- st.markdown("#### DEBUG: Flat view of all uploaded JSON records")
62
- for file in uploaded_files:
63
- file.seek(0)
64
- try:
65
- raw = json.load(file)
66
- # NEW: Don't try to pull lists out of dicts; treat the whole dict as a record
67
- records = raw if isinstance(raw, list) else [raw]
68
- for idx, rec in enumerate(records):
69
- st.code(flatten_json_obj(rec))
70
- except Exception as e:
71
- st.warning(str(e))
72
-
73
  def get_embedding(text):
74
  client = openai.OpenAI(api_key=OPENAI_API_KEY)
75
  response = client.embeddings.create(input=[text], model=EMBEDDING_MODEL)
@@ -99,7 +89,6 @@ def ingest_json_files(files):
99
  file.seek(0)
100
  raw = json.load(file)
101
  source_name = file.name
102
- # NEW: Always treat the whole dict as a record, even if it contains lists
103
  records = raw if isinstance(raw, list) else [raw]
104
  for rec in records:
105
  flat = flatten_json_obj(rec)
@@ -214,17 +203,9 @@ def hybrid_query(user_query, top_k=5):
214
  all_docs.append(doc)
215
  seen_ids.add(doc_id)
216
  entity = extract_main_entity(user_query)
217
- st.markdown(f"#### DEBUG: Extracted entity from question: {entity}")
218
- st.markdown("#### DEBUG: All retrieved docs for your query")
219
- for idx, doc in enumerate(all_docs):
220
- st.code(doc.page_content)
221
  entity_docs = filter_records_by_entity(all_docs, entity) if entity else all_docs
222
  if entity_docs:
223
  doc = entity_docs[0]
224
- if entity:
225
- doc.page_content = re.sub(rf"({re.escape(entity)})", r"**\1**", doc.page_content, flags=re.IGNORECASE)
226
- st.markdown("#### Context shown to LLM")
227
- st.code(doc.page_content)
228
  return [doc]
229
  else:
230
  return all_docs[:1]
@@ -257,13 +238,33 @@ qa_chain = RetrievalQA.from_chain_type(
257
  )
258
 
259
  st.markdown("### Ask any question about your data, just like ChatGPT.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  for msg in st.session_state.messages:
261
  if msg["role"] == "user":
262
  st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
263
  elif msg["role"] == "assistant":
264
  st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
265
- elif msg["role"] == "function":
266
- st.markdown(f"<details><summary><b>Function Output:</b></summary><pre>{msg['content']}</pre></details>", unsafe_allow_html=True)
267
 
268
  def send_message():
269
  user_input = st.session_state.temp_input.strip()
@@ -275,20 +276,24 @@ def send_message():
275
  answer = result['result']
276
  st.session_state.messages.append({"role": "assistant", "content": answer})
277
  docs = result['source_documents']
278
- doc_list = []
279
- for doc in docs:
280
- doc_list.append({
281
- "file": doc.metadata["source_file"],
282
- "id": doc.metadata["id"],
283
- "similarity": doc.metadata["similarity"],
284
- "record": json.loads(doc.metadata["raw_json"])
285
- })
286
- st.session_state.messages.append({"role": "function", "content": json.dumps(doc_list, indent=2)})
 
287
  st.session_state.temp_input = ""
288
 
289
  st.text_input("Your message:", key="temp_input", on_change=send_message)
290
 
291
  if st.button("Clear chat"):
292
  st.session_state.messages = []
 
 
 
293
 
294
  st.info(f"Batches ingested so far (this session): {st.session_state.ingested_batches}")
 
22
  st.session_state.ingested_batches = 0
23
  if "messages" not in st.session_state:
24
  st.session_state.messages = []
25
+ if "json_links" not in st.session_state:
26
+ st.session_state.json_links = []
27
+ if "json_link_details" not in st.session_state:
28
+ st.session_state.json_link_details = {}
29
+ if "expanded_json" not in st.session_state:
30
+ st.session_state.expanded_json = set()
31
 
32
+ st.set_page_config(page_title="Chat with Your JSON Vectors (Hybrid, Clean)", layout="wide")
33
+ st.title("Chat with Your Vectorized JSON Files")
34
 
35
  uploaded_files = st.file_uploader(
36
  "Upload JSON files in batches (any structure)", type="json", accept_multiple_files=True
37
  )
38
 
 
39
  def flatten_json_obj(obj, parent_key="", sep="."):
40
  items = {}
41
  if isinstance(obj, dict):
42
  for k, v in obj.items():
43
  new_key = f"{parent_key}{sep}{k}" if parent_key else k
 
44
  if (
45
  k.lower() in {"customer", "user", "email", "username"} and
46
  isinstance(v, str) and "@" in v
 
60
  items[parent_key] = obj
61
  return items
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def get_embedding(text):
64
  client = openai.OpenAI(api_key=OPENAI_API_KEY)
65
  response = client.embeddings.create(input=[text], model=EMBEDDING_MODEL)
 
89
  file.seek(0)
90
  raw = json.load(file)
91
  source_name = file.name
 
92
  records = raw if isinstance(raw, list) else [raw]
93
  for rec in records:
94
  flat = flatten_json_obj(rec)
 
203
  all_docs.append(doc)
204
  seen_ids.add(doc_id)
205
  entity = extract_main_entity(user_query)
 
 
 
 
206
  entity_docs = filter_records_by_entity(all_docs, entity) if entity else all_docs
207
  if entity_docs:
208
  doc = entity_docs[0]
 
 
 
 
209
  return [doc]
210
  else:
211
  return all_docs[:1]
 
238
  )
239
 
240
  st.markdown("### Ask any question about your data, just like ChatGPT.")
241
+
242
+ def show_tiny_json_links():
243
+ # Only show for the last assistant answer if there are matching JSONs
244
+ if not st.session_state.json_links:
245
+ return
246
+ st.write("")
247
+ for idx, link_key in enumerate(st.session_state.json_links):
248
+ label = st.session_state.json_link_details[link_key]['label']
249
+ rec = st.session_state.json_link_details[link_key]['record']
250
+ unique_id = f"{link_key}_{idx}"
251
+ link_text = f"<a href='javascript:void(0);' style='font-size: 11px; color: #555; text-decoration: underline;' onclick=\"document.getElementById('{unique_id}').style.display = (document.getElementById('{unique_id}').style.display === 'none' ? 'block' : 'none')\">[view JSON]</a> <span style='font-size: 10px; color: #999'>{label}</span>"
252
+ st.markdown(link_text, unsafe_allow_html=True)
253
+ if unique_id not in st.session_state.expanded_json:
254
+ st.session_state.expanded_json.remove(unique_id) if unique_id in st.session_state.expanded_json else None
255
+ if st.session_state.get("show_" + unique_id, False):
256
+ st.code(json.dumps(rec, indent=2), language="json", key=unique_id)
257
+ else:
258
+ st.markdown(f"<div id='{unique_id}' style='display:none'>{json.dumps(rec, indent=2)}</div>", unsafe_allow_html=True)
259
+ st.session_state.json_links = []
260
+ st.session_state.json_link_details = {}
261
+
262
  for msg in st.session_state.messages:
263
  if msg["role"] == "user":
264
  st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
265
  elif msg["role"] == "assistant":
266
  st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
267
+ show_tiny_json_links()
 
268
 
269
  def send_message():
270
  user_input = st.session_state.temp_input.strip()
 
276
  answer = result['result']
277
  st.session_state.messages.append({"role": "assistant", "content": answer})
278
  docs = result['source_documents']
279
+ link_keys = []
280
+ link_details = {}
281
+ for idx, doc in enumerate(docs):
282
+ link_key = f"json_{doc.metadata['id']}_{idx}"
283
+ rec = json.loads(doc.metadata["raw_json"])
284
+ label = f"{doc.metadata['source_file']} | Similarity: {doc.metadata['similarity']}"
285
+ link_details[link_key] = {"label": label, "record": rec}
286
+ link_keys.append(link_key)
287
+ st.session_state.json_links = link_keys
288
+ st.session_state.json_link_details = link_details
289
  st.session_state.temp_input = ""
290
 
291
  st.text_input("Your message:", key="temp_input", on_change=send_message)
292
 
293
  if st.button("Clear chat"):
294
  st.session_state.messages = []
295
+ st.session_state.json_links = []
296
+ st.session_state.json_link_details = {}
297
+ st.session_state.expanded_json = set()
298
 
299
  st.info(f"Batches ingested so far (this session): {st.session_state.ingested_batches}")