Multi_Model_AI_AGENT_VectorDB_langchain_json

Sleeping

App Files Files Community

Seth0330 commited on Jun 12, 2025

Commit

50ba7ba

verified ·

1 Parent(s): 65ce454

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -55

app.py CHANGED Viewed

@@ -3,54 +3,41 @@ import json
 import difflib
 import re
-# --- Flexible/fuzzy search utilities ---
-COMMON_NAME_KEYS = ["user", "username", "name", "fullName", "firstName", "lastName", "customer"]
-LOGIN_KEYS = ["lastLogin", "login", "loggedIn", "lastLoggedIn", "last_login", "last_logged_in"]
 def normalize(s):
-    # Lowercase, replace separators with spaces, strip, collapse whitespace
     return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
 def is_fuzzy_match(a, b, threshold=0.7):
-    # Both a and b should be normalized strings
     ratio = difflib.SequenceMatcher(None, a, b).ratio()
     return ratio >= threshold or a in b or b in a
-# --- Fuzzy, nested user search ---
-def recursive_find_user(target_name):
     matches = []
-    norm_target = normalize(target_name)
     for file_name, data in st.session_state.json_data.items():
         def _search(obj, path):
             if isinstance(obj, dict):
                 for k, v in obj.items():
-                    # Direct match for string value (fuzzy/partial)
-                    if k in COMMON_NAME_KEYS and isinstance(v, str) and is_fuzzy_match(norm_target, normalize(v)):
-                        login_info = {}
-                        for lk in LOGIN_KEYS:
-                            if lk in obj:
-                                login_info[lk] = obj[lk]
                         matches.append({
                             "match_path": path + [k],
-                            "matched_name": v,
                             "record": obj,
-                            "file": file_name,
-                            "login_info": login_info
                         })
-                    # Nested dict (fuzzy/partial)
-                    if k in COMMON_NAME_KEYS and isinstance(v, dict):
                         for nk, nv in v.items():
-                            if nk in COMMON_NAME_KEYS and is_fuzzy_match(norm_target, normalize(nv)):
-                                login_info = {}
-                                for lk in LOGIN_KEYS:
-                                    if lk in obj:
-                                        login_info[lk] = obj[lk]
                                 matches.append({
                                     "match_path": path + [k, nk],
-                                    "matched_name": nv,
-                                    "record": obj,
-                                    "file": file_name,
-                                    "login_info": login_info
                                 })
                     _search(v, path + [k])
             elif isinstance(obj, list):
@@ -59,20 +46,19 @@ def recursive_find_user(target_name):
         _search(data, [])
     return matches
-# --- Show all user keys/values (for debug) ---
-def show_all_users():
     found = []
     for file_name, data in st.session_state.json_data.items():
         def recursive(obj, path):
             if isinstance(obj, dict):
                 for k, v in obj.items():
-                    if k in COMMON_NAME_KEYS:
-                        if isinstance(v, str):
-                            found.append(f"{file_name} | {'.'.join(path + [k])} = {v}")
-                        elif isinstance(v, dict):
-                            for nk, nv in v.items():
-                                if nk in COMMON_NAME_KEYS:
-                                    found.append(f"{file_name} | {'.'.join(path + [k, nk])} = {nv}")
                     recursive(v, path + [k])
             elif isinstance(obj, list):
                 for idx, item in enumerate(obj):
@@ -80,38 +66,38 @@ def show_all_users():
         recursive(data, [])
     return found
-# --- User query handler ---
 def handle_user_query(query):
     patterns = [
         r"(?:last\s*login.*?for|when\s+did)\s+([a-zA-Z0-9 _\-\.@]+)",
         r"when\s+was\s+([a-zA-Z0-9 _\-\.@]+)\s+last\s+(?:login|logged\s*in)",
         r"last\s*login\s*of\s+([a-zA-Z0-9 _\-\.@]+)",
         r"(?:info|details|record) for\s+([a-zA-Z0-9 _\-\.@]+)"
     ]
-    found_name = None
     for pat in patterns:
         m = re.search(pat, query, re.IGNORECASE)
         if m:
-            found_name = m.group(1).strip()
             break
-    if not found_name:
-        # Fallback: look for any word with at least 3 letters (handles very short names too)
         m = re.search(r"([A-Za-z0-9][A-Za-z0-9 _\-\.@]{2,})", query)
         if m:
-            found_name = m.group(1).strip()
-    if found_name:
-        results = recursive_find_user(found_name)
         if not results:
-            return f"No records found for '{found_name}' in any file."
         answers = []
         for res in results:
-            login = ", ".join([f"{k}: {v}" for k, v in res["login_info"].items()]) if res["login_info"] else "No login info found"
             answers.append(
-                f"**{res['matched_name']}** (in file `{res['file']}`) — {login}"
             )
         return "\n\n".join(answers)
     else:
-        return "Sorry, I can only answer direct user info queries (e.g., 'When did Bob the Builder last login?')."
 # --- Streamlit UI setup ---
 if "json_data" not in st.session_state:
@@ -123,8 +109,8 @@ if "temp_input" not in st.session_state:
 if "files_loaded" not in st.session_state:
     st.session_state.files_loaded = False
-st.set_page_config(page_title="Flexible JSON User Q&A", layout="wide")
-st.title("Instant JSON-Backed AI Q&A (Fuzzy Flexible Search!)")
 uploaded_files = st.sidebar.file_uploader(
     "Choose one or more JSON files", type="json", accept_multiple_files=True
@@ -144,7 +130,7 @@ elif not uploaded_files:
     st.session_state.json_data.clear()
     st.session_state.files_loaded = False
-st.markdown("### Ask about any user (partial/typo/fuzzy OK) — try: `bob`, `bob the builder`, `astrofan`, `alice`, `johnny.appleseed`")
 for msg in st.session_state.messages:
     if msg["role"] == "user":
         st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
@@ -161,7 +147,7 @@ def send_message():
 if st.session_state.json_data:
     st.text_input("Your message:", key="temp_input", on_change=send_message)
-    if st.button("Show all users in uploaded JSONs"):
-        st.write(show_all_users())
 else:
     st.info("Please upload at least one JSON file to start chatting.")

 import difflib
 import re
+# --- Fuzzy search utilities ---
 def normalize(s):
     return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
 def is_fuzzy_match(a, b, threshold=0.7):
     ratio = difflib.SequenceMatcher(None, a, b).ratio()
     return ratio >= threshold or a in b or b in a
+# --- Fuzzy, global string value search (all keys/fields) ---
+def recursive_fuzzy_value_search(target_value):
     matches = []
+    norm_target = normalize(target_value)
     for file_name, data in st.session_state.json_data.items():
         def _search(obj, path):
             if isinstance(obj, dict):
                 for k, v in obj.items():
+                    # Match ANY string value (not just specific keys)
+                    if isinstance(v, str) and is_fuzzy_match(norm_target, normalize(v)):
                         matches.append({
                             "match_path": path + [k],
+                            "matched_value": v,
+                            "key": k,
                             "record": obj,
+                            "file": file_name
                         })
+                    # Check inside nested dicts
+                    if isinstance(v, dict):
                         for nk, nv in v.items():
+                            if isinstance(nv, str) and is_fuzzy_match(norm_target, normalize(nv)):
                                 matches.append({
                                     "match_path": path + [k, nk],
+                                    "matched_value": nv,
+                                    "key": nk,
+                                    "record": v,
+                                    "file": file_name
                                 })
                     _search(v, path + [k])
             elif isinstance(obj, list):
         _search(data, [])
     return matches
+# --- Show all string values (for debug) ---
+def show_all_strings():
     found = []
     for file_name, data in st.session_state.json_data.items():
         def recursive(obj, path):
             if isinstance(obj, dict):
                 for k, v in obj.items():
+                    if isinstance(v, str):
+                        found.append(f"{file_name} | {'.'.join(path + [k])} = {v}")
+                    elif isinstance(v, dict):
+                        for nk, nv in v.items():
+                            if isinstance(nv, str):
+                                found.append(f"{file_name} | {'.'.join(path + [k, nk])} = {nv}")
                     recursive(v, path + [k])
             elif isinstance(obj, list):
                 for idx, item in enumerate(obj):
         recursive(data, [])
     return found
+# --- Query handler (searches for any string value) ---
 def handle_user_query(query):
+    # Extract likely search string (more flexible, still supports old user-centric queries)
     patterns = [
         r"(?:last\s*login.*?for|when\s+did)\s+([a-zA-Z0-9 _\-\.@]+)",
         r"when\s+was\s+([a-zA-Z0-9 _\-\.@]+)\s+last\s+(?:login|logged\s*in)",
         r"last\s*login\s*of\s+([a-zA-Z0-9 _\-\.@]+)",
         r"(?:info|details|record) for\s+([a-zA-Z0-9 _\-\.@]+)"
     ]
+    found_value = None
     for pat in patterns:
         m = re.search(pat, query, re.IGNORECASE)
         if m:
+            found_value = m.group(1).strip()
             break
+    if not found_value:
+        # Fallback: any word/phrase of 3+ chars (letters, digits, spaces, dashes, underscores, dots)
         m = re.search(r"([A-Za-z0-9][A-Za-z0-9 _\-\.@]{2,})", query)
         if m:
+            found_value = m.group(1).strip()
+    if found_value:
+        results = recursive_fuzzy_value_search(found_value)
         if not results:
+            return f"No records found for '{found_value}' in any file."
         answers = []
         for res in results:
             answers.append(
+                f"**{res['matched_value']}** (in file `{res['file']}` | key: `{res['key']}` | path: `{'.'.join(res['match_path'])}`)"
             )
         return "\n\n".join(answers)
     else:
+        return "No valid search value detected. Try a person's name, product, device, etc."
 # --- Streamlit UI setup ---
 if "json_data" not in st.session_state:
 if "files_loaded" not in st.session_state:
     st.session_state.files_loaded = False
+st.set_page_config(page_title="Flexible JSON Fuzzy Search", layout="wide")
+st.title("Instant JSON-Backed Q&A (Flexible Fuzzy Search — All Keys!)")
 uploaded_files = st.sidebar.file_uploader(
     "Choose one or more JSON files", type="json", accept_multiple_files=True
     st.session_state.json_data.clear()
     st.session_state.files_loaded = False
+st.markdown("### Ask about ANY value (name, product, device, etc) — partials/typos/substring OK!")
 for msg in st.session_state.messages:
     if msg["role"] == "user":
         st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
 if st.session_state.json_data:
     st.text_input("Your message:", key="temp_input", on_change=send_message)
+    if st.button("Show all strings in uploaded JSONs"):
+        st.write(show_all_strings())
 else:
     st.info("Please upload at least one JSON file to start chatting.")