Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1100,30 +1100,9 @@ def search_knowledge_base(query):
|
|
| 1100 |
# Asynchronous function to validate and verify link content
|
| 1101 |
|
| 1102 |
|
| 1103 |
-
|
| 1104 |
-
|
| 1105 |
-
|
| 1106 |
-
async def validate_and_fetch_content(session, url, query):
|
| 1107 |
-
try:
|
| 1108 |
-
async with session.get(url, allow_redirects=True, timeout=10) as response:
|
| 1109 |
-
if response.status == 200:
|
| 1110 |
-
content = await response.text()
|
| 1111 |
-
if query.lower() in content.lower():
|
| 1112 |
-
return {"url": url, "valid": True, "contains_query": True}
|
| 1113 |
-
return {"url": url, "valid": True, "contains_query": False}
|
| 1114 |
-
except Exception:
|
| 1115 |
-
return {"url": url, "valid": False, "contains_query": False}
|
| 1116 |
-
|
| 1117 |
-
# Function to validate a batch of URLs asynchronously
|
| 1118 |
-
async def validate_links_with_content(urls, query):
|
| 1119 |
-
async with aiohttp.ClientSession() as session:
|
| 1120 |
-
tasks = [validate_and_fetch_content(session, url, query) for url in urls]
|
| 1121 |
-
results = await asyncio.gather(*tasks)
|
| 1122 |
-
return results
|
| 1123 |
-
|
| 1124 |
# Function to perform an advanced Google search using SERP API
|
| 1125 |
-
def google_search(query, site=None, exclude_terms=None, freshness="
|
| 1126 |
-
|
| 1127 |
# Configure SERP API client
|
| 1128 |
search_client = Client(api_key=serper_api_key)
|
| 1129 |
|
|
@@ -1147,19 +1126,12 @@ def google_search(query, site=None, exclude_terms=None, freshness="last_month",
|
|
| 1147 |
# Perform the search
|
| 1148 |
results = search_client.search(search_params)
|
| 1149 |
|
| 1150 |
-
# Extract
|
| 1151 |
organic_results = results.get("organic_results", [])
|
| 1152 |
-
urls = [result.get("link") for result in organic_results if result.get("link")]
|
| 1153 |
-
snippets = [result.get("snippet") for result in organic_results if result.get("link")]
|
| 1154 |
-
|
| 1155 |
-
# Validate URLs asynchronously
|
| 1156 |
-
validated_links = asyncio.run(validate_links_with_content(urls, query))
|
| 1157 |
-
|
| 1158 |
-
# Combine valid snippets and URLs
|
| 1159 |
valid_results = [
|
| 1160 |
-
{"snippet": snippet, "link":
|
| 1161 |
-
for
|
| 1162 |
-
if
|
| 1163 |
]
|
| 1164 |
|
| 1165 |
return valid_results
|
|
@@ -1167,6 +1139,7 @@ def google_search(query, site=None, exclude_terms=None, freshness="last_month",
|
|
| 1167 |
except Exception as e:
|
| 1168 |
logger.error(f"Error in Google search: {e}")
|
| 1169 |
return [{"snippet": "Error occurred during Google search", "link": ""}]
|
|
|
|
| 1170 |
def rag_response(query, selected_doc_ids=None):
|
| 1171 |
"""
|
| 1172 |
Handle queries by searching both the main knowledge base and the selected documents.
|
|
|
|
| 1100 |
# Asynchronous function to validate and verify link content
|
| 1101 |
|
| 1102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1103 |
# Function to perform an advanced Google search using SERP API
|
| 1104 |
+
def google_search(query, site=None, exclude_terms=None, freshness="last_week", gl=None, hl=None):
|
| 1105 |
+
try:
|
| 1106 |
# Configure SERP API client
|
| 1107 |
search_client = Client(api_key=serper_api_key)
|
| 1108 |
|
|
|
|
| 1126 |
# Perform the search
|
| 1127 |
results = search_client.search(search_params)
|
| 1128 |
|
| 1129 |
+
# Extract snippets and URLs
|
| 1130 |
organic_results = results.get("organic_results", [])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1131 |
valid_results = [
|
| 1132 |
+
{"snippet": result["snippet"], "link": result["link"]}
|
| 1133 |
+
for result in organic_results
|
| 1134 |
+
if "snippet" in result and "link" in result
|
| 1135 |
]
|
| 1136 |
|
| 1137 |
return valid_results
|
|
|
|
| 1139 |
except Exception as e:
|
| 1140 |
logger.error(f"Error in Google search: {e}")
|
| 1141 |
return [{"snippet": "Error occurred during Google search", "link": ""}]
|
| 1142 |
+
|
| 1143 |
def rag_response(query, selected_doc_ids=None):
|
| 1144 |
"""
|
| 1145 |
Handle queries by searching both the main knowledge base and the selected documents.
|