Multi_Model_AI_AGENT_VectorDB_langchain_json

Sleeping

App Files Files Community

Seth0330 commited on Jun 10, 2025

Commit

3c83ca9

verified ·

1 Parent(s): 5aa8f9e

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -81

app.py CHANGED Viewed

@@ -1,14 +1,12 @@
 import streamlit as st
-import pandas as pd
 import os
-import requests
 import json
 # --- Page config
-st.set_page_config(page_title="CSV-Backed AI Chat Agent", layout="wide")
-# --- Title & image
-st.title("CSV-Backed AI Chat Agent")
 # --- Load API key
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -21,102 +19,140 @@ HEADERS = {
     "Content-Type": "application/json",
 }
-# --- Sidebar: CSV upload & preview
-st.sidebar.header("Upload CSV File")
-uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv")
-# --- Conversation memory: Use Streamlit session state
 if "messages" not in st.session_state:
     st.session_state.messages = []
 if "temp_input" not in st.session_state:
     st.session_state.temp_input = ""
-# --- Only load df and reset chat on new file upload
-if uploaded_file is not None:
-    try:
-        df = pd.read_csv(uploaded_file)
-        st.sidebar.success("File uploaded successfully!")
-        st.sidebar.write("Preview of the uploaded file:")
-        st.sidebar.dataframe(df.head())
-        columns = ", ".join(df.columns)
-        system_message = {
-            "role": "system",
-            "content": (
-                f"You are an AI data analyst for a CSV file with these columns: {columns}. "
-                "When the user asks a question, always use the most relevant function to get the answer directly. "
-                "Do not describe your plan or reasoning steps. Do not ask the user for clarification. "
-                "Just call the function needed and give the answer, as briefly as possible. "
-                "If you need to search or filter the CSV, use the 'search_csv' function. "
-                "If you need to count unique values, use the 'count_unique' function. "
-                "If you use 'search_csv', use Pandas query syntax."
-            ),
-        }
-        # Only reset memory on new file load
-        if not st.session_state.messages or (
-            st.session_state.messages and
-            ("system" not in st.session_state.messages[0].get("role", ""))
-        ):
-            st.session_state.messages = [system_message]
-        elif (
-            st.session_state.messages and
-            st.session_state.messages[0].get("role", "") == "system" and
-            st.session_state.messages[0].get("content", "") != system_message["content"]
-        ):
-            st.session_state.messages[0] = system_message
-    except Exception as e:
-        st.sidebar.error(f"Error reading file: {e}")
-        df = None
 else:
-    df = None
-if df is not None:
-    st.markdown(f"**Loaded CSV:** {df.shape[0]} rows × {df.shape[1]} columns")
-# --- Functions for function calling
-def search_csv(query: str):
     try:
-        result_df = df.query(query)
-        return result_df.head(10).to_dict(orient="records")   # limit for safety
     except Exception as e:
-        return {"error": f"Invalid query. Example: 'price > 100'. Details: {str(e)}"}
-def count_unique(column: str):
     try:
-        n = df[column].nunique()
-        return {"column": column, "unique_count": int(n)}
     except Exception as e:
-        return {"error": f"Column '{column}' not found or not countable. Details: {str(e)}"}
 # --- Function schemas for OpenAI
 function_schema = [
     {
-        "name": "search_csv",
-        "description": "Filter the CSV rows by a Pandas query. Example: price > 100",
         "parameters": {
             "type": "object",
             "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "A Pandas query string, e.g. 'price > 100 and city == \"Miami\"'"
-                },
             },
-            "required": ["query"],
         },
     },
     {
-        "name": "count_unique",
-        "description": "Count the number of unique values in a column.",
         "parameters": {
             "type": "object",
             "properties": {
-                "column": {
-                    "type": "string",
-                    "description": "The column name to count unique values."
-                },
             },
-            "required": ["column"],
         },
-    }
 ]
 # --- Chat interface
@@ -138,18 +174,18 @@ def send_message():
     user_input = st.session_state.temp_input
     if user_input and user_input.strip():
         st.session_state.messages.append({"role": "user", "content": user_input})
-        # Limit history for context size (keep system + last 8)
         chat_messages = st.session_state.messages
         if len(chat_messages) > 10:
             chat_messages = [chat_messages[0]] + chat_messages[-9:]
         else:
             chat_messages = chat_messages.copy()
-        # First OpenAI call: Check for function call
         chat_resp = requests.post(
             "https://api.openai.com/v1/chat/completions",
             headers=HEADERS,
             json={
-                "model": "gpt-4.1",
                 "messages": chat_messages,
                 "functions": function_schema,
                 "function_call": "auto",
@@ -167,11 +203,12 @@ def send_message():
             func_name = msg["function_call"]["name"]
             args_json = msg["function_call"]["arguments"]
             args = json.loads(args_json)
-            # --- FIXED: Only pass the expected arg for each function
-            if func_name == "search_csv":
-                function_result = search_csv(args.get("query", ""))
-            elif func_name == "count_unique":
-                function_result = count_unique(args.get("column", ""))
             else:
                 function_result = {"error": f"Unknown function: {func_name}"}
             st.session_state.messages.append({
@@ -179,7 +216,7 @@ def send_message():
                 "name": func_name,
                 "content": json.dumps(function_result),
             })
-            # Limit history again for second call
             followup_messages = st.session_state.messages
             if len(followup_messages) > 12:
                 followup_messages = [followup_messages[0]] + followup_messages[-11:]
@@ -204,5 +241,8 @@ def send_message():
         st.session_state.temp_input = ""
-if df is not None:
     st.text_input("Your message:", key="temp_input", on_change=send_message)

 import streamlit as st
 import os
 import json
+import requests
 # --- Page config
+st.set_page_config(page_title="JSON-Backed AI Chat Agent", layout="wide")
+st.title("JSON-Backed AI Chat Agent")
 # --- Load API key
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
     "Content-Type": "application/json",
 }
+# --- Sidebar: Multiple JSON upload & preview
+st.sidebar.header("Upload Multiple JSON Files")
+uploaded_files = st.sidebar.file_uploader(
+    "Choose one or more JSON files", type="json", accept_multiple_files=True
+)
+# --- Session State for data and chat
+if "json_data" not in st.session_state:
+    st.session_state.json_data = {}
 if "messages" not in st.session_state:
     st.session_state.messages = []
 if "temp_input" not in st.session_state:
     st.session_state.temp_input = ""
+# --- Load all JSON files
+if uploaded_files:
+    st.session_state.json_data.clear()
+    file_summaries = []
+    for f in uploaded_files:
+        try:
+            content = json.load(f)
+            st.session_state.json_data[f.name] = content
+            # For summary in system prompt
+            if isinstance(content, dict):
+                keys = list(content.keys())
+            elif isinstance(content, list) and content and isinstance(content[0], dict):
+                keys = list(content[0].keys())
+            else:
+                keys = []
+            file_summaries.append(f"{f.name}: keys={keys[:10]}{'...' if len(keys)>10 else ''}")
+            st.sidebar.success(f"Loaded: {f.name}")
+            st.sidebar.write(f"Keys: {keys[:10]}{'...' if len(keys)>10 else ''}")
+        except Exception as e:
+            st.sidebar.error(f"Error reading {f.name}: {e}")
+    # Compose system prompt for the LLM
+    system_message = {
+        "role": "system",
+        "content": (
+            "You are an AI data analyst for the following JSON files:\n" +
+            "\n".join(file_summaries) +
+            "\nEach file may have a different structure and set of keys. "
+            "When the user asks a question, identify which file(s) it applies to, "
+            "then use the most relevant function to extract the answer. "
+            "If the user does not specify a file, make your best guess based on keys/fields mentioned."
+        ),
+    }
+    # Reset chat if new files loaded
+    st.session_state.messages = [system_message]
 else:
+    st.session_state.json_data.clear()
+# --- Functions for querying JSON files
+def search_json(file_name, key, value):
+    """Return all records in the given JSON file (list of dicts) where key == value."""
+    try:
+        data = st.session_state.json_data[file_name]
+        if isinstance(data, list):
+            results = [item for item in data if isinstance(item, dict) and item.get(key) == value]
+            return results[:10]
+        elif isinstance(data, dict):
+            if key in data and data[key] == value:
+                return [{key: value}]
+            else:
+                return []
+        else:
+            return []
+    except Exception as e:
+        return {"error": str(e)}
+def list_keys(file_name):
+    """Return all top-level keys of the JSON file."""
     try:
+        data = st.session_state.json_data[file_name]
+        if isinstance(data, dict):
+            return list(data.keys())
+        elif isinstance(data, list) and data and isinstance(data[0], dict):
+            return list(data[0].keys())
+        else:
+            return []
     except Exception as e:
+        return {"error": str(e)}
+def count_key_occurrences(file_name, key):
+    """Count number of occurrences of a given key in the JSON file."""
     try:
+        data = st.session_state.json_data[file_name]
+        if isinstance(data, dict):
+            return 1 if key in data else 0
+        elif isinstance(data, list):
+            return sum(1 for item in data if isinstance(item, dict) and key in item)
+        else:
+            return 0
     except Exception as e:
+        return {"error": str(e)}
 # --- Function schemas for OpenAI
 function_schema = [
     {
+        "name": "search_json",
+        "description": "Find records in the specified JSON file where key matches a given value.",
         "parameters": {
             "type": "object",
             "properties": {
+                "file_name": {"type": "string", "description": "The uploaded JSON file to search."},
+                "key": {"type": "string", "description": "The key/field to filter by."},
+                "value": {"type": "string", "description": "The value to match."}
             },
+            "required": ["file_name", "key", "value"],
         },
     },
     {
+        "name": "list_keys",
+        "description": "List all top-level keys in a given JSON file.",
         "parameters": {
             "type": "object",
             "properties": {
+                "file_name": {"type": "string", "description": "The uploaded JSON file."},
             },
+            "required": ["file_name"],
         },
+    },
+    {
+        "name": "count_key_occurrences",
+        "description": "Count the number of times a given key appears in a JSON file.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "file_name": {"type": "string", "description": "The uploaded JSON file."},
+                "key": {"type": "string", "description": "The key to count."},
+            },
+            "required": ["file_name", "key"],
+        },
+    },
 ]
 # --- Chat interface
     user_input = st.session_state.temp_input
     if user_input and user_input.strip():
         st.session_state.messages.append({"role": "user", "content": user_input})
+        # Limit history for context size
         chat_messages = st.session_state.messages
         if len(chat_messages) > 10:
             chat_messages = [chat_messages[0]] + chat_messages[-9:]
         else:
             chat_messages = chat_messages.copy()
+        # OpenAI call
         chat_resp = requests.post(
             "https://api.openai.com/v1/chat/completions",
             headers=HEADERS,
             json={
+                "model": "gpt-4.1",  # Use latest available model for this purpose
                 "messages": chat_messages,
                 "functions": function_schema,
                 "function_call": "auto",
             func_name = msg["function_call"]["name"]
             args_json = msg["function_call"]["arguments"]
             args = json.loads(args_json)
+            if func_name == "search_json":
+                function_result = search_json(args.get("file_name"), args.get("key"), args.get("value"))
+            elif func_name == "list_keys":
+                function_result = list_keys(args.get("file_name"))
+            elif func_name == "count_key_occurrences":
+                function_result = count_key_occurrences(args.get("file_name"), args.get("key"))
             else:
                 function_result = {"error": f"Unknown function: {func_name}"}
             st.session_state.messages.append({
                 "name": func_name,
                 "content": json.dumps(function_result),
             })
+            # Second call to OpenAI for the final answer
             followup_messages = st.session_state.messages
             if len(followup_messages) > 12:
                 followup_messages = [followup_messages[0]] + followup_messages[-11:]
         st.session_state.temp_input = ""
+if st.session_state.json_data:
     st.text_input("Your message:", key="temp_input", on_change=send_message)
+else:
+    st.info("Please upload at least one JSON file to start chatting.")