Multi_Model_AI_AGENT_VectorDB_langchain_json

Sleeping

App Files Files Community

Seth0330 commited on Jun 12, 2025

Commit

46c8eb0

verified ·

1 Parent(s): 657f503

Update app.py

Browse files

Files changed (1) hide show

app.py +249 -75

app.py CHANGED Viewed

@@ -1,103 +1,277 @@
 import streamlit as st
 import json
-from langchain.chat_models import ChatOpenAI
-from langchain.agents import initialize_agent, Tool
 def normalize(s):
     return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
 def is_fuzzy_match(a, b, threshold=0.7):
-    from difflib import SequenceMatcher
-    ratio = SequenceMatcher(None, a, b).ratio()
     return ratio >= threshold or a in b or b in a
-def recursive_fuzzy_value_search(target_value):
     matches = []
-    norm_target = normalize(target_value)
     for file_name, data in st.session_state.json_data.items():
-        def _search(obj, path):
             if isinstance(obj, dict):
                 for k, v in obj.items():
-                    if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(norm_target, normalize(v)):
                         matches.append({
                             "file": file_name,
                             "key": k,
-                            "path": ".".join(path + [k]),
-                            "value": v
                         })
-                    _search(v, path + [k])
             elif isinstance(obj, list):
-                for idx, item in enumerate(obj):
-                    _search(item, path + [f"[{idx}]"])
-        _search(data, [])
     return matches
-# LangChain Tool for LLM
-def json_search_tool(query: str) -> str:
-    """Search all uploaded JSON files for any value (fuzzy match); returns matching fields and values."""
-    results = recursive_fuzzy_value_search(query)
-    if not results:
-        return f"No match for '{query}'."
-    answer = []
-    for res in results:
-        answer.append(f"{res['file']} | {res['key']} ({res['path']}): {res['value']}")
-    return "\n".join(answer)
-# Streamlit UI
-if "json_data" not in st.session_state:
-    st.session_state.json_data = {}
-if "chat_history" not in st.session_state:
-    st.session_state.chat_history = []
-st.set_page_config(page_title="Chat with Your JSONs!", layout="wide")
-st.title("Chat with Your JSON Files (powered by GPT + instant JSON search)")
-uploaded_files = st.sidebar.file_uploader(
-    "Choose one or more JSON files", type="json", accept_multiple_files=True
-)
-if uploaded_files:
-    st.session_state.json_data.clear()
-    for f in uploaded_files:
-        content = json.load(f)
-        st.session_state.json_data[f.name] = content
-    st.sidebar.success("All JSON files loaded.")
-import os
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-if not OPENAI_API_KEY:
-    st.warning("You must set your OPENAI_API_KEY for chat.")
-else:
-    llm = ChatOpenAI(model_name="gpt-4.1", openai_api_key=OPENAI_API_KEY)
-    tools = [
-        Tool(
-            name="json_search",
-            func=json_search_tool,
-            description="Find any value (name, product, number, etc) across all loaded JSON files. Input is what the user wants to find (e.g. 'iphone', 'apps installed', or 'alice')."
-        )
-    ]
-    agent = initialize_agent(
-        tools=tools,
-        llm=llm,
-        agent="chat-conversational-react-description",
-        verbose=False
     )
-    for msg in st.session_state.chat_history:
-        if msg["role"] == "user":
-            st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
-        else:
-            st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
-    def send_chat():
         user_input = st.session_state.temp_input
         if user_input.strip():
-            st.session_state.chat_history.append({"role": "user", "content": user_input})
-            agent_reply = agent.run(user_input)
-            st.session_state.chat_history.append({"role": "assistant", "content": agent_reply})
-            st.session_state.temp_input = ""
-    if st.session_state.json_data:
-        st.text_input("Your message:", key="temp_input", on_change=send_chat)
-    else:
-        st.info("Please upload at least one JSON file to start chatting.")

 import streamlit as st
+import os
 import json
+import requests
+import traceback
+import difflib
+# --- SESSION STATE ---
+if "json_data" not in st.session_state:
+    st.session_state.json_data = {}
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "files_loaded" not in st.session_state:
+    st.session_state.files_loaded = False
+if "temp_input" not in st.session_state:
+    st.session_state.temp_input = ""
+st.set_page_config(page_title="Chat with Your JSONs", layout="wide")
+st.title("Chat with Your JSON Files (OpenAI function-calling, No LangChain)")
+# --- UPLOAD FILES ---
+uploaded_files = st.sidebar.file_uploader(
+    "Choose one or more JSON files", type="json", accept_multiple_files=True
+)
+if uploaded_files and not st.session_state.files_loaded:
+    st.session_state.json_data.clear()
+    for f in uploaded_files:
+        try:
+            content = json.load(f)
+            st.session_state.json_data[f.name] = content
+            st.sidebar.success(f"Loaded: {f.name}")
+        except Exception as e:
+            st.sidebar.error(f"Error reading {f.name}: {e}")
+    st.session_state.files_loaded = True
+    st.session_state.messages = []
+elif not uploaded_files:
+    st.session_state.json_data.clear()
+    st.session_state.files_loaded = False
+# --- NORMALIZE ---
 def normalize(s):
     return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
 def is_fuzzy_match(a, b, threshold=0.7):
+    ratio = difflib.SequenceMatcher(None, a, b).ratio()
     return ratio >= threshold or a in b or b in a
+# --- FLEXIBLE SEARCH (ALL VALUES, ALL TYPES) ---
+def search_all_jsons(key, value):
     matches = []
+    value_norm = normalize(value)
     for file_name, data in st.session_state.json_data.items():
+        def recursive_search(obj):
             if isinstance(obj, dict):
                 for k, v in obj.items():
+                    if normalize(k) == normalize(key):
+                        if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(value_norm, normalize(v)):
+                            matches.append({
+                                "file": file_name,
+                                "key": k,
+                                "value": v,
+                                "record": obj
+                            })
+                    recursive_search(v)
+            elif isinstance(obj, list):
+                for item in obj:
+                    recursive_search(item)
+        recursive_search(data)
+    return matches
+def fuzzy_value_search(value):
+    matches = []
+    value_norm = normalize(value)
+    for file_name, data in st.session_state.json_data.items():
+        def recursive_search(obj):
+            if isinstance(obj, dict):
+                for k, v in obj.items():
+                    if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(value_norm, normalize(v)):
                         matches.append({
                             "file": file_name,
                             "key": k,
+                            "value": v,
+                            "record": obj
                         })
+                    recursive_search(v)
             elif isinstance(obj, list):
+                for item in obj:
+                    recursive_search(item)
+        recursive_search(data)
     return matches
+def list_keys(file_name):
+    try:
+        data = st.session_state.json_data[file_name]
+        if isinstance(data, dict):
+            return list(data.keys())
+        elif isinstance(data, list) and data and isinstance(data[0], dict):
+            return list(data[0].keys())
+        else:
+            return []
+    except Exception as e:
+        return {"error": str(e)}
+def count_key_occurrences(file_name, key):
+    try:
+        data = st.session_state.json_data[file_name]
+        count = 0
+        def recursive(obj):
+            nonlocal count
+            if isinstance(obj, dict):
+                for k, v in obj.items():
+                    if normalize(k) == normalize(key):
+                        count += 1
+                    recursive(v)
+            elif isinstance(obj, list):
+                for item in obj:
+                    recursive(item)
+        recursive(data)
+        return count
+    except Exception as e:
+        return {"error": str(e)}
+# --- FUNCTION SCHEMA for OpenAI ---
+function_schema = [
+    {
+        "name": "search_all_jsons",
+        "description": "Recursively search all uploaded JSONs for all records where a key matches a value (fuzzy, any type).",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "key": {"type": "string"},
+                "value": {"type": "string"}
+            },
+            "required": ["key", "value"]
+        }
+    },
+    {
+        "name": "fuzzy_value_search",
+        "description": "Search all uploaded JSONs for any record with a field value matching (fuzzy, all types).",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "value": {"type": "string"}
+            },
+            "required": ["value"]
+        }
+    },
+    {
+        "name": "list_keys",
+        "description": "List top-level keys in a given JSON file.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "file_name": {"type": "string"}
+            },
+            "required": ["file_name"]
+        }
+    },
+    {
+        "name": "count_key_occurrences",
+        "description": "Count number of times a key appears in a file.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "file_name": {"type": "string"},
+                "key": {"type": "string"}
+            },
+            "required": ["file_name", "key"]
+        }
+    }
+]
+# --- SYSTEM PROMPT: Set expectations for OpenAI! ---
+system_message = {
+    "role": "system",
+    "content": (
+        "You are a JSON data assistant. Use the functions provided to answer the user's question. "
+        "If the user's query does not mention a key, use 'fuzzy_value_search' to match on any value. "
+        "If a key is mentioned (like 'apps_installed'), use 'search_all_jsons' for that key and the value. "
+        "You may use 'list_keys' to help discover the file structure if needed. "
+        "Always give a direct answer from the data if possible."
     )
+}
+# --- CHAT UI (with OpenAI function-calling!) ---
+st.markdown("### Ask any question about your data, just like ChatGPT.")
+for msg in st.session_state.messages:
+    if msg["role"] == "user":
+        st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
+    elif msg["role"] == "assistant":
+        st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
+    elif msg["role"] == "function":
+        st.markdown(f"<details><summary><b>Function '{msg['name']}' output:</b></summary><pre>{msg['content']}</pre></details>", unsafe_allow_html=True)
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+HEADERS = {
+    "Authorization": f"Bearer {OPENAI_API_KEY}",
+    "Content-Type": "application/json",
+}
+def send_message():
+    try:
         user_input = st.session_state.temp_input
         if user_input.strip():
+            st.session_state.messages.append({"role": "user", "content": user_input})
+            chat_messages = [system_message] + st.session_state.messages[-10:]
+            chat_resp = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers=HEADERS,
+                json={
+                    "model": "gpt-4.1",
+                    "messages": chat_messages,
+                    "functions": function_schema,
+                    "function_call": "auto",
+                    "temperature": 0,
+                    "max_tokens": 1200,
+                },
+                timeout=60,
+            )
+            chat_resp.raise_for_status()
+            response_json = chat_resp.json()
+            msg = response_json["choices"][0]["message"]
+            if msg.get("function_call"):
+                func_name = msg["function_call"]["name"]
+                args_json = msg["function_call"]["arguments"]
+                args = json.loads(args_json)
+                # Call the right function
+                if func_name == "search_all_jsons":
+                    result = search_all_jsons(args.get("key"), args.get("value"))
+                elif func_name == "fuzzy_value_search":
+                    result = fuzzy_value_search(args.get("value"))
+                elif func_name == "list_keys":
+                    result = list_keys(args.get("file_name"))
+                elif func_name == "count_key_occurrences":
+                    result = count_key_occurrences(args.get("file_name"), args.get("key"))
+                else:
+                    result = {"error": f"Unknown function: {func_name}"}
+                st.session_state.messages.append({
+                    "role": "function",
+                    "name": func_name,
+                    "content": json.dumps(result, indent=2),
+                })
+                # Let LLM observe function output and craft final answer
+                followup_messages = chat_messages + [
+                    {"role": "function", "name": func_name, "content": json.dumps(result, indent=2)}
+                ]
+                final_resp = requests.post(
+                    "https://api.openai.com/v1/chat/completions",
+                    headers=HEADERS,
+                    json={
+                        "model": "gpt-4.1",
+                        "messages": followup_messages,
+                        "temperature": 0,
+                        "max_tokens": 1200,
+                    },
+                    timeout=60,
+                )
+                final_resp.raise_for_status()
+                final_json = final_resp.json()
+                answer = final_json["choices"][0]["message"]["content"]
+                st.session_state.messages.append({"role": "assistant", "content": answer})
+            else:
+                st.session_state.messages.append({"role": "assistant", "content": msg["content"]})
+        st.session_state.temp_input = ""
+    except Exception as e:
+        st.error("Exception: " + str(e))
+        st.code(traceback.format_exc())
+if st.session_state.json_data:
+    st.text_input("Your message:", key="temp_input", on_change=send_message)
+else:
+    st.info("Please upload at least one JSON file to start chatting.")