Multi_Model_AI_AGENT_VectorDB_langchain_json

Sleeping

App Files Files Community

Seth0330 commited on Jun 12, 2025

Commit

aa866f2

verified ·

1 Parent(s): f73d69f

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -9

app.py CHANGED Viewed

@@ -5,7 +5,10 @@ import requests
 import traceback
 import difflib
-# --- SESSION STATE ---
 if "json_data" not in st.session_state:
     st.session_state.json_data = {}
 if "messages" not in st.session_state:
@@ -18,7 +21,7 @@ if "temp_input" not in st.session_state:
 st.set_page_config(page_title="Chat with Your JSONs", layout="wide")
 st.title("Chat with Your JSON Files (OpenAI function-calling, No LangChain)")
-# --- UPLOAD FILES ---
 uploaded_files = st.sidebar.file_uploader(
     "Choose one or more JSON files", type="json", accept_multiple_files=True
 )
@@ -37,7 +40,7 @@ elif not uploaded_files:
     st.session_state.json_data.clear()
     st.session_state.files_loaded = False
-# --- NORMALIZE & FUZZY ---
 def normalize(s):
     return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
@@ -45,7 +48,7 @@ def is_fuzzy_match(a, b, threshold=0.7):
     ratio = difflib.SequenceMatcher(None, a, b).ratio()
     return ratio >= threshold or a in b or b in a
-# --- FLEXIBLE SEARCH (ALL VALUES, ALL TYPES) ---
 def search_all_jsons(key, value):
     matches = []
     value_norm = normalize(value)
@@ -68,6 +71,7 @@ def search_all_jsons(key, value):
         recursive_search(data)
     return matches
 def fuzzy_value_search(value):
     matches = []
     value_norm = normalize(value)
@@ -89,6 +93,7 @@ def fuzzy_value_search(value):
         recursive_search(data)
     return matches
 def list_keys(file_name):
     try:
         data = st.session_state.json_data[file_name]
@@ -101,6 +106,7 @@ def list_keys(file_name):
     except Exception as e:
         return {"error": str(e)}
 def count_key_occurrences(file_name, key):
     try:
         data = st.session_state.json_data[file_name]
@@ -120,9 +126,8 @@ def count_key_occurrences(file_name, key):
     except Exception as e:
         return {"error": str(e)}
-# --- NEW: FIND/COUNT IN ARRAYS ---
 def find_in_arrays(key, value, return_count=True):
-    # Searches ALL arrays for dicts where key == value
     matches = []
     count = 0
     for file_name, data in st.session_state.json_data.items():
@@ -146,7 +151,54 @@ def find_in_arrays(key, value, return_count=True):
         recursive(data)
     return count if return_count else matches
-# --- FUNCTION SCHEMA for OpenAI ---
 function_schema = [
     {
         "name": "search_all_jsons",
@@ -206,15 +258,37 @@ function_schema = [
             },
             "required": ["key", "value"]
         }
     }
 ]
-# --- SYSTEM PROMPT ---
 system_message = {
     "role": "system",
     "content": (
         "You are a JSON data assistant. Use the functions provided to answer the user's question. "
         "If the user asks for the number or details of items in a list/array (e.g., completed tasks), use 'find_in_arrays'. "
         "If the user's query does not mention a key, use 'fuzzy_value_search' to match on any value. "
         "If a key is mentioned (like 'apps_installed'), use 'search_all_jsons' for that key and the value. "
         "You may use 'list_keys' to help discover the file structure if needed. "
@@ -222,7 +296,7 @@ system_message = {
     )
 }
-# --- CHAT UI (with OpenAI function-calling!) ---
 st.markdown("### Ask any question about your data, just like ChatGPT.")
 for msg in st.session_state.messages:
@@ -282,6 +356,13 @@ def send_message():
                         args.get("value"),
                         args.get("return_count", True)
                     )
                 else:
                     result = {"error": f"Unknown function: {func_name}"}

 import traceback
 import difflib
+# ---- BASIC NAME GUESS FOR FEMALE NAMES (expand as needed)
+COMMON_FEMALE_NAMES = {"alice", "mary", "lisa", "jane", "emily", "sophia", "emma", "olivia", "ava", "mia", "isabella", "charlotte", "amelia", "harper", "abigail"}
+# ---- SESSION STATE ----
 if "json_data" not in st.session_state:
     st.session_state.json_data = {}
 if "messages" not in st.session_state:
 st.set_page_config(page_title="Chat with Your JSONs", layout="wide")
 st.title("Chat with Your JSON Files (OpenAI function-calling, No LangChain)")
+# ---- UPLOAD FILES ----
 uploaded_files = st.sidebar.file_uploader(
     "Choose one or more JSON files", type="json", accept_multiple_files=True
 )
     st.session_state.json_data.clear()
     st.session_state.files_loaded = False
+# ---- UTILS ----
 def normalize(s):
     return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
     ratio = difflib.SequenceMatcher(None, a, b).ratio()
     return ratio >= threshold or a in b or b in a
+# ---- SEARCH ALL KEYS FOR KEY/VALUE MATCH ----
 def search_all_jsons(key, value):
     matches = []
     value_norm = normalize(value)
         recursive_search(data)
     return matches
+# ---- FUZZY VALUE SEARCH (returns WHOLE RECORD) ----
 def fuzzy_value_search(value):
     matches = []
     value_norm = normalize(value)
         recursive_search(data)
     return matches
+# ---- LIST KEYS ----
 def list_keys(file_name):
     try:
         data = st.session_state.json_data[file_name]
     except Exception as e:
         return {"error": str(e)}
+# ---- COUNT KEY OCCURRENCES ----
 def count_key_occurrences(file_name, key):
     try:
         data = st.session_state.json_data[file_name]
     except Exception as e:
         return {"error": str(e)}
+# ---- FIND/COUNT IN ARRAYS (e.g., COMPLETED TASKS) ----
 def find_in_arrays(key, value, return_count=True):
     matches = []
     count = 0
     for file_name, data in st.session_state.json_data.items():
         recursive(data)
     return count if return_count else matches
+# ---- SUM FIELD BY NAME (e.g., TOTAL AMOUNT FOR JOHNNY) ----
+def sum_field_by_name(name, field="amount"):
+    total = 0
+    details = []
+    name_norm = normalize(name)
+    for file_name, data in st.session_state.json_data.items():
+        def recursive(obj):
+            nonlocal total
+            if isinstance(obj, dict):
+                for k, v in obj.items():
+                    if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(name_norm, normalize(v)):
+                        # Look for field in this or sibling dict
+                        if field in obj:
+                            try:
+                                amt = float(obj[field])
+                                total += amt
+                                details.append({"file": file_name, "name_match": v, "amount": amt, "record": obj})
+                            except Exception:
+                                pass
+                    recursive(v)
+            elif isinstance(obj, list):
+                for item in obj:
+                    recursive(item)
+        recursive(data)
+    return {"total": total, "matches": details}
+# ---- COUNT FEMALE NAMES (guess from common names) ----
+def count_female_names():
+    count = 0
+    names = []
+    for file_name, data in st.session_state.json_data.items():
+        def recursive(obj):
+            nonlocal count
+            if isinstance(obj, dict):
+                for k, v in obj.items():
+                    if k.lower() in {"name", "fullName", "firstName"}:
+                        first_name = str(v).split()[0].lower()
+                        if first_name in COMMON_FEMALE_NAMES:
+                            count += 1
+                            names.append({"file": file_name, "name": v, "record": obj})
+                    recursive(v)
+            elif isinstance(obj, list):
+                for item in obj:
+                    recursive(item)
+        recursive(data)
+    return {"count": count, "names": names}
+# ---- FUNCTION SCHEMA for OpenAI ----
 function_schema = [
     {
         "name": "search_all_jsons",
             },
             "required": ["key", "value"]
         }
+    },
+    {
+        "name": "sum_field_by_name",
+        "description": "Sum a field (e.g. amount) for any record containing a name/email/identifier. Returns total and breakdown.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "name": {"type": "string", "description": "Name or identifier to match"},
+                "field": {"type": "string", "description": "The numeric field to sum, e.g. 'amount'"},
+            },
+            "required": ["name", "field"]
+        }
+    },
+    {
+        "name": "count_female_names",
+        "description": "Count the number of common female names based on a preset list.",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+        }
     }
 ]
+# ---- SYSTEM PROMPT ----
 system_message = {
     "role": "system",
     "content": (
         "You are a JSON data assistant. Use the functions provided to answer the user's question. "
         "If the user asks for the number or details of items in a list/array (e.g., completed tasks), use 'find_in_arrays'. "
+        "If the user asks about the sum/total of a field for a name or identifier, use 'sum_field_by_name'. "
+        "If the user asks about female names, use 'count_female_names'. "
         "If the user's query does not mention a key, use 'fuzzy_value_search' to match on any value. "
         "If a key is mentioned (like 'apps_installed'), use 'search_all_jsons' for that key and the value. "
         "You may use 'list_keys' to help discover the file structure if needed. "
     )
 }
+# ---- CHAT UI (with OpenAI function-calling!) ----
 st.markdown("### Ask any question about your data, just like ChatGPT.")
 for msg in st.session_state.messages:
                         args.get("value"),
                         args.get("return_count", True)
                     )
+                elif func_name == "sum_field_by_name":
+                    result = sum_field_by_name(
+                        args.get("name"),
+                        args.get("field", "amount")
+                    )
+                elif func_name == "count_female_names":
+                    result = count_female_names()
                 else:
                     result = {"error": f"Unknown function: {func_name}"}