Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1097,26 +1097,29 @@ def search_knowledge_base(query):
|
|
| 1097 |
# Retrieve the top 5 most relevant documents
|
| 1098 |
retrieved_docs = st.session_state["faiss_db"].similarity_search(query, k=3)
|
| 1099 |
return retrieved_docs
|
| 1100 |
-
|
| 1101 |
-
async def
|
| 1102 |
try:
|
| 1103 |
-
async with session.
|
| 1104 |
if response.status == 200:
|
| 1105 |
-
|
| 1106 |
-
|
|
|
|
|
|
|
| 1107 |
except Exception:
|
| 1108 |
-
return False
|
| 1109 |
|
| 1110 |
# Function to validate a batch of URLs asynchronously
|
| 1111 |
-
async def
|
| 1112 |
async with aiohttp.ClientSession() as session:
|
| 1113 |
-
tasks = [
|
| 1114 |
results = await asyncio.gather(*tasks)
|
| 1115 |
return results
|
| 1116 |
|
| 1117 |
# Function to perform a Google search and validate links
|
| 1118 |
def google_search(query):
|
| 1119 |
try:
|
|
|
|
| 1120 |
search_client = Client(api_key=serper_api_key)
|
| 1121 |
results = search_client.search({"engine": "google", "q": query})
|
| 1122 |
|
|
@@ -1125,22 +1128,20 @@ def google_search(query):
|
|
| 1125 |
urls = [result.get("link") for result in organic_results if result.get("link")]
|
| 1126 |
snippets = [result.get("snippet") for result in organic_results if result.get("link")]
|
| 1127 |
|
| 1128 |
-
# Validate
|
| 1129 |
-
|
| 1130 |
-
|
| 1131 |
-
#
|
| 1132 |
-
valid_results = [
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
|
| 1138 |
return valid_results
|
| 1139 |
|
| 1140 |
except Exception as e:
|
| 1141 |
logger.error(f"Error in Google search: {e}")
|
| 1142 |
return [{"snippet": "Error occurred during Google search", "link": ""}]
|
| 1143 |
-
|
| 1144 |
def rag_response(query, selected_doc_ids=None):
|
| 1145 |
"""
|
| 1146 |
Handle queries by searching both the main knowledge base and the selected documents.
|
|
|
|
| 1097 |
# Retrieve the top 5 most relevant documents
|
| 1098 |
retrieved_docs = st.session_state["faiss_db"].similarity_search(query, k=3)
|
| 1099 |
return retrieved_docs
|
| 1100 |
+
# Asynchronous function to validate and verify link content
|
| 1101 |
+
async def validate_and_fetch_content(session, url, query):
|
| 1102 |
try:
|
| 1103 |
+
async with session.get(url, allow_redirects=True, timeout=10) as response:
|
| 1104 |
if response.status == 200:
|
| 1105 |
+
content = await response.text()
|
| 1106 |
+
if query.lower() in content.lower():
|
| 1107 |
+
return {"url": url, "valid": True, "contains_query": True}
|
| 1108 |
+
return {"url": url, "valid": True, "contains_query": False}
|
| 1109 |
except Exception:
|
| 1110 |
+
return {"url": url, "valid": False, "contains_query": False}
|
| 1111 |
|
| 1112 |
# Function to validate a batch of URLs asynchronously
|
| 1113 |
+
async def validate_links_with_content(urls, query):
|
| 1114 |
async with aiohttp.ClientSession() as session:
|
| 1115 |
+
tasks = [validate_and_fetch_content(session, url, query) for url in urls]
|
| 1116 |
results = await asyncio.gather(*tasks)
|
| 1117 |
return results
|
| 1118 |
|
| 1119 |
# Function to perform a Google search and validate links
|
| 1120 |
def google_search(query):
|
| 1121 |
try:
|
| 1122 |
+
# Step 1: Perform the search using SERP API
|
| 1123 |
search_client = Client(api_key=serper_api_key)
|
| 1124 |
results = search_client.search({"engine": "google", "q": query})
|
| 1125 |
|
|
|
|
| 1128 |
urls = [result.get("link") for result in organic_results if result.get("link")]
|
| 1129 |
snippets = [result.get("snippet") for result in organic_results if result.get("link")]
|
| 1130 |
|
| 1131 |
+
# Step 2: Validate and verify link content
|
| 1132 |
+
validated_links = asyncio.run(validate_links_with_content(urls, query))
|
| 1133 |
+
|
| 1134 |
+
# Step 3: Combine valid snippets and URLs with verified content
|
| 1135 |
+
valid_results = []
|
| 1136 |
+
for snippet, validation in zip(snippets, validated_links):
|
| 1137 |
+
if validation["valid"] and validation["contains_query"]:
|
| 1138 |
+
valid_results.append({"snippet": snippet, "link": validation["url"]})
|
| 1139 |
+
|
|
|
|
| 1140 |
return valid_results
|
| 1141 |
|
| 1142 |
except Exception as e:
|
| 1143 |
logger.error(f"Error in Google search: {e}")
|
| 1144 |
return [{"snippet": "Error occurred during Google search", "link": ""}]
|
|
|
|
| 1145 |
def rag_response(query, selected_doc_ids=None):
|
| 1146 |
"""
|
| 1147 |
Handle queries by searching both the main knowledge base and the selected documents.
|