Multi_Model_AI_AGENT_VectorDB_langchain_json

Sleeping

App Files Files Community

Seth0330 commited on Jun 12, 2025

Commit

9a5dc6b

verified ·

1 Parent(s): f5c02d4

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -14

app.py CHANGED Viewed

@@ -22,21 +22,15 @@ if "ingested_batches" not in st.session_state:
     st.session_state.ingested_batches = 0
 if "messages" not in st.session_state:
     st.session_state.messages = []
-if "modal_open" not in st.session_state:
-    st.session_state.modal_open = False
-if "modal_content" not in st.session_state:
-    st.session_state.modal_content = ""
-if "modal_title" not in st.session_state:
-    st.session_state.modal_title = ""
-st.set_page_config(page_title="Chat with Your JSON Vectors", layout="wide")
-st.title("Chat with Your Vectorized JSON Files (Hybrid Retrieval, SQLite, LLM)")
 uploaded_files = st.file_uploader(
     "Upload JSON files in batches (any structure)", type="json", accept_multiple_files=True
 )
-# --- Enhanced Flattening: extract names from emails/user fields for LLM context
 def flatten_json_obj(obj, parent_key="", sep="."):
     items = {}
     if isinstance(obj, dict):
@@ -62,6 +56,25 @@ def flatten_json_obj(obj, parent_key="", sep="."):
         items[parent_key] = obj
     return items
 def get_embedding(text):
     client = openai.OpenAI(api_key=OPENAI_API_KEY)
     response = client.embeddings.create(input=[text], model=EMBEDDING_MODEL)
@@ -88,6 +101,7 @@ def ingest_json_files(files):
     rows = []
     batch_time = datetime.datetime.utcnow().isoformat()
     for file in files:
         raw = json.load(file)
         source_name = file.name
         if isinstance(raw, list):
@@ -199,7 +213,6 @@ def filter_records_by_entity(records, entity):
             matches.append(doc)
     return matches if matches else records
 def hybrid_query(user_query, top_k=5):
     vector_docs = query_vector_db(user_query, top_k=top_k)
     fuzzy_docs = python_fuzzy_match(user_query, top_k=top_k)
@@ -211,11 +224,13 @@ def hybrid_query(user_query, top_k=5):
             all_docs.append(doc)
             seen_ids.add(doc_id)
     entity = extract_main_entity(user_query)
     entity_docs = filter_records_by_entity(all_docs, entity) if entity else all_docs
-    # Show only the most relevant record (to make LLM's job easier)
     if entity_docs:
         doc = entity_docs[0]
-        # Optionally, highlight entity in context
         if entity:
             doc.page_content = re.sub(rf"({re.escape(entity)})", r"**\1**", doc.page_content, flags=re.IGNORECASE)
         st.markdown("#### Context shown to LLM")
@@ -229,7 +244,6 @@ class HybridRetriever(BaseRetriever):
     def _get_relevant_documents(self, query, run_manager=None, **kwargs):
         return hybrid_query(query, self.top_k)
-# --- Prompt (explicitly tells LLM what to do)
 system_prompt = (
     "You are a JSON data assistant. "
     "If the question mentions a name or email (e.g. Johnny), match it to any field value (even as part of an email) "
@@ -244,7 +258,6 @@ prompt = ChatPromptTemplate.from_messages([
 ])
 llm = ChatOpenAI(model="gpt-4.1", openai_api_key=OPENAI_API_KEY, temperature=0)
 retriever = HybridRetriever(top_k=5)
 qa_chain = RetrievalQA.from_chain_type(
     llm=llm,

     st.session_state.ingested_batches = 0
 if "messages" not in st.session_state:
     st.session_state.messages = []
+st.set_page_config(page_title="Chat with Your JSON Vectors (DEBUG)", layout="wide")
+st.title("Chat with Your Vectorized JSON Files (Hybrid Retrieval, SQLite, LLM, DEBUG)")
 uploaded_files = st.file_uploader(
     "Upload JSON files in batches (any structure)", type="json", accept_multiple_files=True
 )
+# --- Enhanced flattening with name extraction for emails/user fields
 def flatten_json_obj(obj, parent_key="", sep="."):
     items = {}
     if isinstance(obj, dict):
         items[parent_key] = obj
     return items
+# --- DEBUG: Show flattening of uploaded JSONs
+if uploaded_files:
+    st.markdown("#### DEBUG: Flat view of all uploaded JSON records")
+    for file in uploaded_files:
+        file.seek(0)
+        try:
+            raw = json.load(file)
+            if isinstance(raw, list):
+                records = raw
+            elif isinstance(raw, dict):
+                main_lists = [v for v in raw.values() if isinstance(v, list)]
+                records = main_lists[0] if main_lists else [raw]
+            else:
+                records = [raw]
+            for idx, rec in enumerate(records):
+                st.code(flatten_json_obj(rec))
+        except Exception as e:
+            st.warning(str(e))
 def get_embedding(text):
     client = openai.OpenAI(api_key=OPENAI_API_KEY)
     response = client.embeddings.create(input=[text], model=EMBEDDING_MODEL)
     rows = []
     batch_time = datetime.datetime.utcnow().isoformat()
     for file in files:
+        file.seek(0)
         raw = json.load(file)
         source_name = file.name
         if isinstance(raw, list):
             matches.append(doc)
     return matches if matches else records
 def hybrid_query(user_query, top_k=5):
     vector_docs = query_vector_db(user_query, top_k=top_k)
     fuzzy_docs = python_fuzzy_match(user_query, top_k=top_k)
             all_docs.append(doc)
             seen_ids.add(doc_id)
     entity = extract_main_entity(user_query)
+    st.markdown(f"#### DEBUG: Extracted entity from question: {entity}")
+    st.markdown("#### DEBUG: All retrieved docs for your query")
+    for idx, doc in enumerate(all_docs):
+        st.code(doc.page_content)
     entity_docs = filter_records_by_entity(all_docs, entity) if entity else all_docs
     if entity_docs:
         doc = entity_docs[0]
         if entity:
             doc.page_content = re.sub(rf"({re.escape(entity)})", r"**\1**", doc.page_content, flags=re.IGNORECASE)
         st.markdown("#### Context shown to LLM")
     def _get_relevant_documents(self, query, run_manager=None, **kwargs):
         return hybrid_query(query, self.top_k)
 system_prompt = (
     "You are a JSON data assistant. "
     "If the question mentions a name or email (e.g. Johnny), match it to any field value (even as part of an email) "
 ])
 llm = ChatOpenAI(model="gpt-4.1", openai_api_key=OPENAI_API_KEY, temperature=0)
 retriever = HybridRetriever(top_k=5)
 qa_chain = RetrievalQA.from_chain_type(
     llm=llm,