Update app.py
Browse files
app.py
CHANGED
|
@@ -174,17 +174,32 @@ def python_fuzzy_match(user_query, top_k=5):
|
|
| 174 |
return docs
|
| 175 |
|
| 176 |
def extract_main_entity(question):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
tokens = re.findall(r"\b([A-Za-z0-9]+)\b", question)
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
def filter_records_by_entity(records, entity):
|
|
|
|
|
|
|
| 182 |
matches = []
|
| 183 |
for doc in records:
|
| 184 |
-
if entity
|
|
|
|
|
|
|
| 185 |
matches.append(doc)
|
| 186 |
return matches if matches else records
|
| 187 |
|
|
|
|
| 188 |
def hybrid_query(user_query, top_k=5):
|
| 189 |
vector_docs = query_vector_db(user_query, top_k=top_k)
|
| 190 |
fuzzy_docs = python_fuzzy_match(user_query, top_k=top_k)
|
|
|
|
| 174 |
return docs
|
| 175 |
|
| 176 |
def extract_main_entity(question):
|
| 177 |
+
import re
|
| 178 |
+
quoted = re.findall(r"['\"]([^'\"]+)['\"]", question)
|
| 179 |
+
if quoted:
|
| 180 |
+
return quoted[0].lower()
|
| 181 |
+
email = re.findall(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", question)
|
| 182 |
+
if email:
|
| 183 |
+
return email[0].lower().split('@')[0]
|
| 184 |
tokens = re.findall(r"\b([A-Za-z0-9]+)\b", question)
|
| 185 |
+
stopwords = {"how", "much", "did", "spend", "was", "the", "is", "in", "on", "for", "a", "an", "of", "to", "with"}
|
| 186 |
+
keywords = [t.lower() for t in tokens if t.lower() not in stopwords]
|
| 187 |
+
if not keywords:
|
| 188 |
+
return ""
|
| 189 |
+
return max(keywords, key=len)
|
| 190 |
|
| 191 |
def filter_records_by_entity(records, entity):
|
| 192 |
+
if not entity:
|
| 193 |
+
return records
|
| 194 |
matches = []
|
| 195 |
for doc in records:
|
| 196 |
+
if entity in doc.page_content.lower():
|
| 197 |
+
matches.append(doc)
|
| 198 |
+
elif any(entity in v.lower() for v in doc.page_content.split(';')):
|
| 199 |
matches.append(doc)
|
| 200 |
return matches if matches else records
|
| 201 |
|
| 202 |
+
|
| 203 |
def hybrid_query(user_query, top_k=5):
|
| 204 |
vector_docs = query_vector_db(user_query, top_k=top_k)
|
| 205 |
fuzzy_docs = python_fuzzy_match(user_query, top_k=top_k)
|