Seth0330 commited on
Commit
f5c02d4
·
verified ·
1 Parent(s): b72bfb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -3
app.py CHANGED
@@ -174,17 +174,32 @@ def python_fuzzy_match(user_query, top_k=5):
174
  return docs
175
 
176
  def extract_main_entity(question):
 
 
 
 
 
 
 
177
  tokens = re.findall(r"\b([A-Za-z0-9]+)\b", question)
178
- keywords = [t.lower() for t in tokens if t.lower() not in {"how", "much", "did", "spend", "was", "the", "is", "in", "on", "for", "a", "an", "of", "to", "with"}]
179
- return keywords[0] if keywords else None
 
 
 
180
 
181
  def filter_records_by_entity(records, entity):
 
 
182
  matches = []
183
  for doc in records:
184
- if entity and entity in doc.page_content.lower():
 
 
185
  matches.append(doc)
186
  return matches if matches else records
187
 
 
188
  def hybrid_query(user_query, top_k=5):
189
  vector_docs = query_vector_db(user_query, top_k=top_k)
190
  fuzzy_docs = python_fuzzy_match(user_query, top_k=top_k)
 
174
  return docs
175
 
176
  def extract_main_entity(question):
177
+ import re
178
+ quoted = re.findall(r"['\"]([^'\"]+)['\"]", question)
179
+ if quoted:
180
+ return quoted[0].lower()
181
+ email = re.findall(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", question)
182
+ if email:
183
+ return email[0].lower().split('@')[0]
184
  tokens = re.findall(r"\b([A-Za-z0-9]+)\b", question)
185
+ stopwords = {"how", "much", "did", "spend", "was", "the", "is", "in", "on", "for", "a", "an", "of", "to", "with"}
186
+ keywords = [t.lower() for t in tokens if t.lower() not in stopwords]
187
+ if not keywords:
188
+ return ""
189
+ return max(keywords, key=len)
190
 
191
  def filter_records_by_entity(records, entity):
192
+ if not entity:
193
+ return records
194
  matches = []
195
  for doc in records:
196
+ if entity in doc.page_content.lower():
197
+ matches.append(doc)
198
+ elif any(entity in v.lower() for v in doc.page_content.split(';')):
199
  matches.append(doc)
200
  return matches if matches else records
201
 
202
+
203
  def hybrid_query(user_query, top_k=5):
204
  vector_docs = query_vector_db(user_query, top_k=top_k)
205
  fuzzy_docs = python_fuzzy_match(user_query, top_k=top_k)