Seth0330 commited on
Commit
50ba7ba
·
verified ·
1 Parent(s): 65ce454

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -55
app.py CHANGED
@@ -3,54 +3,41 @@ import json
3
  import difflib
4
  import re
5
 
6
- # --- Flexible/fuzzy search utilities ---
7
- COMMON_NAME_KEYS = ["user", "username", "name", "fullName", "firstName", "lastName", "customer"]
8
- LOGIN_KEYS = ["lastLogin", "login", "loggedIn", "lastLoggedIn", "last_login", "last_logged_in"]
9
-
10
  def normalize(s):
11
- # Lowercase, replace separators with spaces, strip, collapse whitespace
12
  return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
13
 
14
  def is_fuzzy_match(a, b, threshold=0.7):
15
- # Both a and b should be normalized strings
16
  ratio = difflib.SequenceMatcher(None, a, b).ratio()
17
  return ratio >= threshold or a in b or b in a
18
 
19
- # --- Fuzzy, nested user search ---
20
- def recursive_find_user(target_name):
21
  matches = []
22
- norm_target = normalize(target_name)
23
  for file_name, data in st.session_state.json_data.items():
24
  def _search(obj, path):
25
  if isinstance(obj, dict):
26
  for k, v in obj.items():
27
- # Direct match for string value (fuzzy/partial)
28
- if k in COMMON_NAME_KEYS and isinstance(v, str) and is_fuzzy_match(norm_target, normalize(v)):
29
- login_info = {}
30
- for lk in LOGIN_KEYS:
31
- if lk in obj:
32
- login_info[lk] = obj[lk]
33
  matches.append({
34
  "match_path": path + [k],
35
- "matched_name": v,
 
36
  "record": obj,
37
- "file": file_name,
38
- "login_info": login_info
39
  })
40
- # Nested dict (fuzzy/partial)
41
- if k in COMMON_NAME_KEYS and isinstance(v, dict):
42
  for nk, nv in v.items():
43
- if nk in COMMON_NAME_KEYS and is_fuzzy_match(norm_target, normalize(nv)):
44
- login_info = {}
45
- for lk in LOGIN_KEYS:
46
- if lk in obj:
47
- login_info[lk] = obj[lk]
48
  matches.append({
49
  "match_path": path + [k, nk],
50
- "matched_name": nv,
51
- "record": obj,
52
- "file": file_name,
53
- "login_info": login_info
54
  })
55
  _search(v, path + [k])
56
  elif isinstance(obj, list):
@@ -59,20 +46,19 @@ def recursive_find_user(target_name):
59
  _search(data, [])
60
  return matches
61
 
62
- # --- Show all user keys/values (for debug) ---
63
- def show_all_users():
64
  found = []
65
  for file_name, data in st.session_state.json_data.items():
66
  def recursive(obj, path):
67
  if isinstance(obj, dict):
68
  for k, v in obj.items():
69
- if k in COMMON_NAME_KEYS:
70
- if isinstance(v, str):
71
- found.append(f"{file_name} | {'.'.join(path + [k])} = {v}")
72
- elif isinstance(v, dict):
73
- for nk, nv in v.items():
74
- if nk in COMMON_NAME_KEYS:
75
- found.append(f"{file_name} | {'.'.join(path + [k, nk])} = {nv}")
76
  recursive(v, path + [k])
77
  elif isinstance(obj, list):
78
  for idx, item in enumerate(obj):
@@ -80,38 +66,38 @@ def show_all_users():
80
  recursive(data, [])
81
  return found
82
 
83
- # --- User query handler ---
84
  def handle_user_query(query):
 
85
  patterns = [
86
  r"(?:last\s*login.*?for|when\s+did)\s+([a-zA-Z0-9 _\-\.@]+)",
87
  r"when\s+was\s+([a-zA-Z0-9 _\-\.@]+)\s+last\s+(?:login|logged\s*in)",
88
  r"last\s*login\s*of\s+([a-zA-Z0-9 _\-\.@]+)",
89
  r"(?:info|details|record) for\s+([a-zA-Z0-9 _\-\.@]+)"
90
  ]
91
- found_name = None
92
  for pat in patterns:
93
  m = re.search(pat, query, re.IGNORECASE)
94
  if m:
95
- found_name = m.group(1).strip()
96
  break
97
- if not found_name:
98
- # Fallback: look for any word with at least 3 letters (handles very short names too)
99
  m = re.search(r"([A-Za-z0-9][A-Za-z0-9 _\-\.@]{2,})", query)
100
  if m:
101
- found_name = m.group(1).strip()
102
- if found_name:
103
- results = recursive_find_user(found_name)
104
  if not results:
105
- return f"No records found for '{found_name}' in any file."
106
  answers = []
107
  for res in results:
108
- login = ", ".join([f"{k}: {v}" for k, v in res["login_info"].items()]) if res["login_info"] else "No login info found"
109
  answers.append(
110
- f"**{res['matched_name']}** (in file `{res['file']}`) {login}"
111
  )
112
  return "\n\n".join(answers)
113
  else:
114
- return "Sorry, I can only answer direct user info queries (e.g., 'When did Bob the Builder last login?')."
115
 
116
  # --- Streamlit UI setup ---
117
  if "json_data" not in st.session_state:
@@ -123,8 +109,8 @@ if "temp_input" not in st.session_state:
123
  if "files_loaded" not in st.session_state:
124
  st.session_state.files_loaded = False
125
 
126
- st.set_page_config(page_title="Flexible JSON User Q&A", layout="wide")
127
- st.title("Instant JSON-Backed AI Q&A (Fuzzy Flexible Search!)")
128
 
129
  uploaded_files = st.sidebar.file_uploader(
130
  "Choose one or more JSON files", type="json", accept_multiple_files=True
@@ -144,7 +130,7 @@ elif not uploaded_files:
144
  st.session_state.json_data.clear()
145
  st.session_state.files_loaded = False
146
 
147
- st.markdown("### Ask about any user (partial/typo/fuzzy OK) — try: `bob`, `bob the builder`, `astrofan`, `alice`, `johnny.appleseed`")
148
  for msg in st.session_state.messages:
149
  if msg["role"] == "user":
150
  st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
@@ -161,7 +147,7 @@ def send_message():
161
 
162
  if st.session_state.json_data:
163
  st.text_input("Your message:", key="temp_input", on_change=send_message)
164
- if st.button("Show all users in uploaded JSONs"):
165
- st.write(show_all_users())
166
  else:
167
  st.info("Please upload at least one JSON file to start chatting.")
 
3
  import difflib
4
  import re
5
 
6
+ # --- Fuzzy search utilities ---
 
 
 
7
  def normalize(s):
 
8
  return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())
9
 
10
  def is_fuzzy_match(a, b, threshold=0.7):
 
11
  ratio = difflib.SequenceMatcher(None, a, b).ratio()
12
  return ratio >= threshold or a in b or b in a
13
 
14
+ # --- Fuzzy, global string value search (all keys/fields) ---
15
+ def recursive_fuzzy_value_search(target_value):
16
  matches = []
17
+ norm_target = normalize(target_value)
18
  for file_name, data in st.session_state.json_data.items():
19
  def _search(obj, path):
20
  if isinstance(obj, dict):
21
  for k, v in obj.items():
22
+ # Match ANY string value (not just specific keys)
23
+ if isinstance(v, str) and is_fuzzy_match(norm_target, normalize(v)):
 
 
 
 
24
  matches.append({
25
  "match_path": path + [k],
26
+ "matched_value": v,
27
+ "key": k,
28
  "record": obj,
29
+ "file": file_name
 
30
  })
31
+ # Check inside nested dicts
32
+ if isinstance(v, dict):
33
  for nk, nv in v.items():
34
+ if isinstance(nv, str) and is_fuzzy_match(norm_target, normalize(nv)):
 
 
 
 
35
  matches.append({
36
  "match_path": path + [k, nk],
37
+ "matched_value": nv,
38
+ "key": nk,
39
+ "record": v,
40
+ "file": file_name
41
  })
42
  _search(v, path + [k])
43
  elif isinstance(obj, list):
 
46
  _search(data, [])
47
  return matches
48
 
49
+ # --- Show all string values (for debug) ---
50
+ def show_all_strings():
51
  found = []
52
  for file_name, data in st.session_state.json_data.items():
53
  def recursive(obj, path):
54
  if isinstance(obj, dict):
55
  for k, v in obj.items():
56
+ if isinstance(v, str):
57
+ found.append(f"{file_name} | {'.'.join(path + [k])} = {v}")
58
+ elif isinstance(v, dict):
59
+ for nk, nv in v.items():
60
+ if isinstance(nv, str):
61
+ found.append(f"{file_name} | {'.'.join(path + [k, nk])} = {nv}")
 
62
  recursive(v, path + [k])
63
  elif isinstance(obj, list):
64
  for idx, item in enumerate(obj):
 
66
  recursive(data, [])
67
  return found
68
 
69
+ # --- Query handler (searches for any string value) ---
70
  def handle_user_query(query):
71
+ # Extract likely search string (more flexible, still supports old user-centric queries)
72
  patterns = [
73
  r"(?:last\s*login.*?for|when\s+did)\s+([a-zA-Z0-9 _\-\.@]+)",
74
  r"when\s+was\s+([a-zA-Z0-9 _\-\.@]+)\s+last\s+(?:login|logged\s*in)",
75
  r"last\s*login\s*of\s+([a-zA-Z0-9 _\-\.@]+)",
76
  r"(?:info|details|record) for\s+([a-zA-Z0-9 _\-\.@]+)"
77
  ]
78
+ found_value = None
79
  for pat in patterns:
80
  m = re.search(pat, query, re.IGNORECASE)
81
  if m:
82
+ found_value = m.group(1).strip()
83
  break
84
+ if not found_value:
85
+ # Fallback: any word/phrase of 3+ chars (letters, digits, spaces, dashes, underscores, dots)
86
  m = re.search(r"([A-Za-z0-9][A-Za-z0-9 _\-\.@]{2,})", query)
87
  if m:
88
+ found_value = m.group(1).strip()
89
+ if found_value:
90
+ results = recursive_fuzzy_value_search(found_value)
91
  if not results:
92
+ return f"No records found for '{found_value}' in any file."
93
  answers = []
94
  for res in results:
 
95
  answers.append(
96
+ f"**{res['matched_value']}** (in file `{res['file']}` | key: `{res['key']}` | path: `{'.'.join(res['match_path'])}`)"
97
  )
98
  return "\n\n".join(answers)
99
  else:
100
+ return "No valid search value detected. Try a person's name, product, device, etc."
101
 
102
  # --- Streamlit UI setup ---
103
  if "json_data" not in st.session_state:
 
109
  if "files_loaded" not in st.session_state:
110
  st.session_state.files_loaded = False
111
 
112
+ st.set_page_config(page_title="Flexible JSON Fuzzy Search", layout="wide")
113
+ st.title("Instant JSON-Backed Q&A (Flexible Fuzzy Search — All Keys!)")
114
 
115
  uploaded_files = st.sidebar.file_uploader(
116
  "Choose one or more JSON files", type="json", accept_multiple_files=True
 
130
  st.session_state.json_data.clear()
131
  st.session_state.files_loaded = False
132
 
133
+ st.markdown("### Ask about ANY value (name, product, device, etc) — partials/typos/substring OK!")
134
  for msg in st.session_state.messages:
135
  if msg["role"] == "user":
136
  st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
 
147
 
148
  if st.session_state.json_data:
149
  st.text_input("Your message:", key="temp_input", on_change=send_message)
150
+ if st.button("Show all strings in uploaded JSONs"):
151
+ st.write(show_all_strings())
152
  else:
153
  st.info("Please upload at least one JSON file to start chatting.")