Multi_Model_AI_AGENT_VectorDB_langchain_json

Sleeping

App Files Files Community

Seth0330 commited on Jun 12, 2025

Commit

4cba20f

verified ·

1 Parent(s): 5d4ff05

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -38

app.py CHANGED Viewed

@@ -22,21 +22,25 @@ if "ingested_batches" not in st.session_state:
     st.session_state.ingested_batches = 0
 if "messages" not in st.session_state:
     st.session_state.messages = []
-st.set_page_config(page_title="Chat with Your JSON Vectors (Hybrid, Enhanced)", layout="wide")
-st.title("Chat with Your Vectorized JSON Files (Hybrid Retrieval, SQLite, LLM)")
 uploaded_files = st.file_uploader(
     "Upload JSON files in batches (any structure)", type="json", accept_multiple_files=True
 )
-# --- Enhanced flattening (never loses parent fields)
 def flatten_json_obj(obj, parent_key="", sep="."):
     items = {}
     if isinstance(obj, dict):
         for k, v in obj.items():
             new_key = f"{parent_key}{sep}{k}" if parent_key else k
-            # If this is a customer/email field, extract name!
             if (
                 k.lower() in {"customer", "user", "email", "username"} and
                 isinstance(v, str) and "@" in v
@@ -56,20 +60,6 @@ def flatten_json_obj(obj, parent_key="", sep="."):
         items[parent_key] = obj
     return items
-# --- DEBUG: Show flattening of uploaded JSONs
-if uploaded_files:
-    st.markdown("#### DEBUG: Flat view of all uploaded JSON records")
-    for file in uploaded_files:
-        file.seek(0)
-        try:
-            raw = json.load(file)
-            # NEW: Don't try to pull lists out of dicts; treat the whole dict as a record
-            records = raw if isinstance(raw, list) else [raw]
-            for idx, rec in enumerate(records):
-                st.code(flatten_json_obj(rec))
-        except Exception as e:
-            st.warning(str(e))
 def get_embedding(text):
     client = openai.OpenAI(api_key=OPENAI_API_KEY)
     response = client.embeddings.create(input=[text], model=EMBEDDING_MODEL)
@@ -99,7 +89,6 @@ def ingest_json_files(files):
         file.seek(0)
         raw = json.load(file)
         source_name = file.name
-        # NEW: Always treat the whole dict as a record, even if it contains lists
         records = raw if isinstance(raw, list) else [raw]
         for rec in records:
             flat = flatten_json_obj(rec)
@@ -214,17 +203,9 @@ def hybrid_query(user_query, top_k=5):
             all_docs.append(doc)
             seen_ids.add(doc_id)
     entity = extract_main_entity(user_query)
-    st.markdown(f"#### DEBUG: Extracted entity from question: {entity}")
-    st.markdown("#### DEBUG: All retrieved docs for your query")
-    for idx, doc in enumerate(all_docs):
-        st.code(doc.page_content)
     entity_docs = filter_records_by_entity(all_docs, entity) if entity else all_docs
     if entity_docs:
         doc = entity_docs[0]
-        if entity:
-            doc.page_content = re.sub(rf"({re.escape(entity)})", r"**\1**", doc.page_content, flags=re.IGNORECASE)
-        st.markdown("#### Context shown to LLM")
-        st.code(doc.page_content)
         return [doc]
     else:
         return all_docs[:1]
@@ -257,13 +238,33 @@ qa_chain = RetrievalQA.from_chain_type(
 )
 st.markdown("### Ask any question about your data, just like ChatGPT.")
 for msg in st.session_state.messages:
     if msg["role"] == "user":
         st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
     elif msg["role"] == "assistant":
         st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
-    elif msg["role"] == "function":
-        st.markdown(f"<details><summary><b>Function Output:</b></summary><pre>{msg['content']}</pre></details>", unsafe_allow_html=True)
 def send_message():
     user_input = st.session_state.temp_input.strip()
@@ -275,20 +276,24 @@ def send_message():
         answer = result['result']
         st.session_state.messages.append({"role": "assistant", "content": answer})
         docs = result['source_documents']
-        doc_list = []
-        for doc in docs:
-            doc_list.append({
-                "file": doc.metadata["source_file"],
-                "id": doc.metadata["id"],
-                "similarity": doc.metadata["similarity"],
-                "record": json.loads(doc.metadata["raw_json"])
-            })
-        st.session_state.messages.append({"role": "function", "content": json.dumps(doc_list, indent=2)})
     st.session_state.temp_input = ""
 st.text_input("Your message:", key="temp_input", on_change=send_message)
 if st.button("Clear chat"):
     st.session_state.messages = []
 st.info(f"Batches ingested so far (this session): {st.session_state.ingested_batches}")

     st.session_state.ingested_batches = 0
 if "messages" not in st.session_state:
     st.session_state.messages = []
+if "json_links" not in st.session_state:
+    st.session_state.json_links = []
+if "json_link_details" not in st.session_state:
+    st.session_state.json_link_details = {}
+if "expanded_json" not in st.session_state:
+    st.session_state.expanded_json = set()
+st.set_page_config(page_title="Chat with Your JSON Vectors (Hybrid, Clean)", layout="wide")
+st.title("Chat with Your Vectorized JSON Files")
 uploaded_files = st.file_uploader(
     "Upload JSON files in batches (any structure)", type="json", accept_multiple_files=True
 )
 def flatten_json_obj(obj, parent_key="", sep="."):
     items = {}
     if isinstance(obj, dict):
         for k, v in obj.items():
             new_key = f"{parent_key}{sep}{k}" if parent_key else k
             if (
                 k.lower() in {"customer", "user", "email", "username"} and
                 isinstance(v, str) and "@" in v
         items[parent_key] = obj
     return items
 def get_embedding(text):
     client = openai.OpenAI(api_key=OPENAI_API_KEY)
     response = client.embeddings.create(input=[text], model=EMBEDDING_MODEL)
         file.seek(0)
         raw = json.load(file)
         source_name = file.name
         records = raw if isinstance(raw, list) else [raw]
         for rec in records:
             flat = flatten_json_obj(rec)
             all_docs.append(doc)
             seen_ids.add(doc_id)
     entity = extract_main_entity(user_query)
     entity_docs = filter_records_by_entity(all_docs, entity) if entity else all_docs
     if entity_docs:
         doc = entity_docs[0]
         return [doc]
     else:
         return all_docs[:1]
 )
 st.markdown("### Ask any question about your data, just like ChatGPT.")
+def show_tiny_json_links():
+    # Only show for the last assistant answer if there are matching JSONs
+    if not st.session_state.json_links:
+        return
+    st.write("")
+    for idx, link_key in enumerate(st.session_state.json_links):
+        label = st.session_state.json_link_details[link_key]['label']
+        rec = st.session_state.json_link_details[link_key]['record']
+        unique_id = f"{link_key}_{idx}"
+        link_text = f"<a href='javascript:void(0);' style='font-size: 11px; color: #555; text-decoration: underline;' onclick=\"document.getElementById('{unique_id}').style.display = (document.getElementById('{unique_id}').style.display === 'none' ? 'block' : 'none')\">[view JSON]</a> <span style='font-size: 10px; color: #999'>{label}</span>"
+        st.markdown(link_text, unsafe_allow_html=True)
+        if unique_id not in st.session_state.expanded_json:
+            st.session_state.expanded_json.remove(unique_id) if unique_id in st.session_state.expanded_json else None
+        if st.session_state.get("show_" + unique_id, False):
+            st.code(json.dumps(rec, indent=2), language="json", key=unique_id)
+        else:
+            st.markdown(f"<div id='{unique_id}' style='display:none'>{json.dumps(rec, indent=2)}</div>", unsafe_allow_html=True)
+    st.session_state.json_links = []
+    st.session_state.json_link_details = {}
 for msg in st.session_state.messages:
     if msg["role"] == "user":
         st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
     elif msg["role"] == "assistant":
         st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
+        show_tiny_json_links()
 def send_message():
     user_input = st.session_state.temp_input.strip()
         answer = result['result']
         st.session_state.messages.append({"role": "assistant", "content": answer})
         docs = result['source_documents']
+        link_keys = []
+        link_details = {}
+        for idx, doc in enumerate(docs):
+            link_key = f"json_{doc.metadata['id']}_{idx}"
+            rec = json.loads(doc.metadata["raw_json"])
+            label = f"{doc.metadata['source_file']} | Similarity: {doc.metadata['similarity']}"
+            link_details[link_key] = {"label": label, "record": rec}
+            link_keys.append(link_key)
+        st.session_state.json_links = link_keys
+        st.session_state.json_link_details = link_details
     st.session_state.temp_input = ""
 st.text_input("Your message:", key="temp_input", on_change=send_message)
 if st.button("Clear chat"):
     st.session_state.messages = []
+    st.session_state.json_links = []
+    st.session_state.json_link_details = {}
+    st.session_state.expanded_json = set()
 st.info(f"Batches ingested so far (this session): {st.session_state.ingested_batches}")