Seth0330 commited on
Commit
657f503
·
verified ·
1 Parent(s): 4e778f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -108
app.py CHANGED
@@ -1,13 +1,14 @@
1
  import streamlit as st
2
  import json
3
- import difflib
4
- import re
5
 
6
  def normalize(s):
7
  return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
8
 
9
  def is_fuzzy_match(a, b, threshold=0.7):
10
- ratio = difflib.SequenceMatcher(None, a, b).ratio()
 
11
  return ratio >= threshold or a in b or b in a
12
 
13
  def recursive_fuzzy_value_search(target_value):
@@ -17,26 +18,13 @@ def recursive_fuzzy_value_search(target_value):
17
  def _search(obj, path):
18
  if isinstance(obj, dict):
19
  for k, v in obj.items():
20
- # Match ANY primitive value (str, int, float, bool)
21
  if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(norm_target, normalize(v)):
22
  matches.append({
23
- "match_path": path + [k],
24
- "matched_value": v,
25
  "key": k,
26
- "record": obj,
27
- "file": file_name
28
  })
29
- # Check inside nested dicts
30
- if isinstance(v, dict):
31
- for nk, nv in v.items():
32
- if isinstance(nv, (str, int, float, bool)) and is_fuzzy_match(norm_target, normalize(nv)):
33
- matches.append({
34
- "match_path": path + [k, nk],
35
- "matched_value": nv,
36
- "key": nk,
37
- "record": v,
38
- "file": file_name
39
- })
40
  _search(v, path + [k])
41
  elif isinstance(obj, list):
42
  for idx, item in enumerate(obj):
@@ -44,105 +32,72 @@ def recursive_fuzzy_value_search(target_value):
44
  _search(data, [])
45
  return matches
46
 
47
- def show_all_strings():
48
- found = []
49
- for file_name, data in st.session_state.json_data.items():
50
- def recursive(obj, path):
51
- if isinstance(obj, dict):
52
- for k, v in obj.items():
53
- if isinstance(v, (str, int, float, bool)):
54
- found.append(f"{file_name} | {'.'.join(path + [k])} = {v}")
55
- elif isinstance(v, dict):
56
- for nk, nv in v.items():
57
- if isinstance(nv, (str, int, float, bool)):
58
- found.append(f"{file_name} | {'.'.join(path + [k, nk])} = {nv}")
59
- recursive(v, path + [k])
60
- elif isinstance(obj, list):
61
- for idx, item in enumerate(obj):
62
- recursive(item, path + [f"[{idx}]"])
63
- recursive(data, [])
64
- return found
65
-
66
- def handle_user_query(query):
67
- patterns = [
68
- r"(?:last\s*login.*?for|when\s+did)\s+([a-zA-Z0-9 _\-\.@]+)",
69
- r"when\s+was\s+([a-zA-Z0-9 _\-\.@]+)\s+last\s+(?:login|logged\s*in)",
70
- r"last\s*login\s*of\s+([a-zA-Z0-9 _\-\.@]+)",
71
- r"(?:info|details|record) for\s+([a-zA-Z0-9 _\-\.@]+)"
72
- ]
73
- found_value = None
74
- for pat in patterns:
75
- m = re.search(pat, query, re.IGNORECASE)
76
- if m:
77
- found_value = m.group(1).strip()
78
- break
79
- if not found_value:
80
- # Fallback: any word/phrase of 1+ char (letters, digits, spaces, dashes, underscores, dots)
81
- m = re.search(r"([A-Za-z0-9][A-Za-z0-9 _\-\.@]*)", query)
82
- if m:
83
- found_value = m.group(1).strip()
84
- if found_value:
85
- results = recursive_fuzzy_value_search(found_value)
86
- if not results:
87
- return f"No records found for '{found_value}' in any file."
88
- answers = []
89
- for res in results:
90
- answers.append(
91
- f"**{res['matched_value']}** (in file `{res['file']}` | key: `{res['key']}` | path: `{'.'.join(res['match_path'])}`)"
92
- )
93
- return "\n\n".join(answers)
94
- else:
95
- return "No valid search value detected. Try a person's name, number, product, device, etc."
96
 
97
- # --- Streamlit UI setup ---
98
  if "json_data" not in st.session_state:
99
  st.session_state.json_data = {}
100
- if "messages" not in st.session_state:
101
- st.session_state.messages = []
102
- if "temp_input" not in st.session_state:
103
- st.session_state.temp_input = ""
104
- if "files_loaded" not in st.session_state:
105
- st.session_state.files_loaded = False
106
 
107
- st.set_page_config(page_title="Flexible JSON Fuzzy Search", layout="wide")
108
- st.title("Instant JSON-Backed Q&A (Flexible Fuzzy Search All Keys & Types!)")
109
 
110
  uploaded_files = st.sidebar.file_uploader(
111
  "Choose one or more JSON files", type="json", accept_multiple_files=True
112
  )
113
- if uploaded_files and not st.session_state.files_loaded:
114
  st.session_state.json_data.clear()
115
  for f in uploaded_files:
116
- try:
117
- content = json.load(f)
118
- st.session_state.json_data[f.name] = content
119
- st.sidebar.success(f"Loaded: {f.name}")
120
- except Exception as e:
121
- st.sidebar.error(f"Error reading {f.name}: {e}")
122
- st.session_state.messages = []
123
- st.session_state.files_loaded = True
124
- elif not uploaded_files:
125
- st.session_state.json_data.clear()
126
- st.session_state.files_loaded = False
127
-
128
- st.markdown("### Ask about ANY value (name, product, number, device, etc) — partials/typos/substring OK!")
129
- for msg in st.session_state.messages:
130
- if msg["role"] == "user":
131
- st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
132
- else:
133
- st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
134
 
135
- def send_message():
136
- user_input = st.session_state.temp_input
137
- if user_input.strip():
138
- st.session_state.messages.append({"role": "user", "content": user_input})
139
- answer = handle_user_query(user_input)
140
- st.session_state.messages.append({"role": "assistant", "content": answer})
141
- st.session_state.temp_input = ""
142
 
143
- if st.session_state.json_data:
144
- st.text_input("Your message:", key="temp_input", on_change=send_message)
145
- if st.button("Show all values in uploaded JSONs"):
146
- st.write(show_all_strings())
147
  else:
148
- st.info("Please upload at least one JSON file to start chatting.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import json
3
+ from langchain.chat_models import ChatOpenAI
4
+ from langchain.agents import initialize_agent, Tool
5
 
6
  def normalize(s):
7
  return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
8
 
9
  def is_fuzzy_match(a, b, threshold=0.7):
10
+ from difflib import SequenceMatcher
11
+ ratio = SequenceMatcher(None, a, b).ratio()
12
  return ratio >= threshold or a in b or b in a
13
 
14
  def recursive_fuzzy_value_search(target_value):
 
18
  def _search(obj, path):
19
  if isinstance(obj, dict):
20
  for k, v in obj.items():
 
21
  if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(norm_target, normalize(v)):
22
  matches.append({
23
+ "file": file_name,
 
24
  "key": k,
25
+ "path": ".".join(path + [k]),
26
+ "value": v
27
  })
 
 
 
 
 
 
 
 
 
 
 
28
  _search(v, path + [k])
29
  elif isinstance(obj, list):
30
  for idx, item in enumerate(obj):
 
32
  _search(data, [])
33
  return matches
34
 
35
+ # LangChain Tool for LLM
36
+ def json_search_tool(query: str) -> str:
37
+ """Search all uploaded JSON files for any value (fuzzy match); returns matching fields and values."""
38
+ results = recursive_fuzzy_value_search(query)
39
+ if not results:
40
+ return f"No match for '{query}'."
41
+ answer = []
42
+ for res in results:
43
+ answer.append(f"{res['file']} | {res['key']} ({res['path']}): {res['value']}")
44
+ return "\n".join(answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ # Streamlit UI
47
  if "json_data" not in st.session_state:
48
  st.session_state.json_data = {}
49
+ if "chat_history" not in st.session_state:
50
+ st.session_state.chat_history = []
 
 
 
 
51
 
52
+ st.set_page_config(page_title="Chat with Your JSONs!", layout="wide")
53
+ st.title("Chat with Your JSON Files (powered by GPT + instant JSON search)")
54
 
55
  uploaded_files = st.sidebar.file_uploader(
56
  "Choose one or more JSON files", type="json", accept_multiple_files=True
57
  )
58
+ if uploaded_files:
59
  st.session_state.json_data.clear()
60
  for f in uploaded_files:
61
+ content = json.load(f)
62
+ st.session_state.json_data[f.name] = content
63
+ st.sidebar.success("All JSON files loaded.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ import os
66
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
67
+ if not OPENAI_API_KEY:
68
+ st.warning("You must set your OPENAI_API_KEY for chat.")
 
 
 
69
 
 
 
 
 
70
  else:
71
+ llm = ChatOpenAI(model_name="gpt-4.1", openai_api_key=OPENAI_API_KEY)
72
+ tools = [
73
+ Tool(
74
+ name="json_search",
75
+ func=json_search_tool,
76
+ description="Find any value (name, product, number, etc) across all loaded JSON files. Input is what the user wants to find (e.g. 'iphone', 'apps installed', or 'alice')."
77
+ )
78
+ ]
79
+ agent = initialize_agent(
80
+ tools=tools,
81
+ llm=llm,
82
+ agent="chat-conversational-react-description",
83
+ verbose=False
84
+ )
85
+
86
+ for msg in st.session_state.chat_history:
87
+ if msg["role"] == "user":
88
+ st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
89
+ else:
90
+ st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)
91
+
92
+ def send_chat():
93
+ user_input = st.session_state.temp_input
94
+ if user_input.strip():
95
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
96
+ agent_reply = agent.run(user_input)
97
+ st.session_state.chat_history.append({"role": "assistant", "content": agent_reply})
98
+ st.session_state.temp_input = ""
99
+
100
+ if st.session_state.json_data:
101
+ st.text_input("Your message:", key="temp_input", on_change=send_chat)
102
+ else:
103
+ st.info("Please upload at least one JSON file to start chatting.")