Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,6 +18,8 @@ from urllib.parse import quote, urlparse
|
|
| 18 |
import redis
|
| 19 |
|
| 20 |
import serpapi
|
|
|
|
|
|
|
| 21 |
import requests
|
| 22 |
import streamlit.components.v1 as components
|
| 23 |
import smtplib
|
|
@@ -1094,52 +1096,47 @@ def search_knowledge_base(query):
|
|
| 1094 |
# Retrieve the top 5 most relevant documents
|
| 1095 |
retrieved_docs = st.session_state["faiss_db"].similarity_search(query, k=3)
|
| 1096 |
return retrieved_docs
|
| 1097 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1098 |
def google_search(query):
|
| 1099 |
-
"""
|
| 1100 |
-
Performs a Google search using the SerpApi service and retrieves search result snippets.
|
| 1101 |
-
This function uses the SerpApi client to perform a Google search based on the provided query.
|
| 1102 |
-
It extracts and returns the snippets from the organic search results.
|
| 1103 |
-
Args:
|
| 1104 |
-
query (str): The search query to be used for the Google search.
|
| 1105 |
-
Returns:
|
| 1106 |
-
list: A list of snippets from the organic search results. If an error occurs, returns a list with an error message.
|
| 1107 |
-
Raises:
|
| 1108 |
-
requests.exceptions.HTTPError: If an HTTP error occurs during the search, it is logged and an error message is returned.
|
| 1109 |
-
Exception: For any other general errors, they are logged and an error message is returned.
|
| 1110 |
-
"""
|
| 1111 |
try:
|
| 1112 |
-
|
| 1113 |
-
|
| 1114 |
-
|
| 1115 |
-
|
| 1116 |
-
|
| 1117 |
-
|
| 1118 |
-
|
| 1119 |
-
|
| 1120 |
-
# Send POST request to the API
|
| 1121 |
-
conn.request("POST", "/search", payload, headers)
|
| 1122 |
-
|
| 1123 |
-
# Get response and decode the data
|
| 1124 |
-
res = conn.getresponse()
|
| 1125 |
-
data = res.read()
|
| 1126 |
-
results = json.loads(data.decode("utf-8"))
|
| 1127 |
-
|
| 1128 |
-
# Extract snippets from organic search results
|
| 1129 |
-
snippets = [result["snippet"] for result in results.get("organic", [])]
|
| 1130 |
|
| 1131 |
-
#
|
| 1132 |
-
|
| 1133 |
-
|
| 1134 |
-
#
|
| 1135 |
-
|
| 1136 |
-
|
|
|
|
|
|
|
| 1137 |
except Exception as e:
|
| 1138 |
-
|
| 1139 |
-
print(f"General Error: {e}")
|
| 1140 |
return ["Error occurred during Google search"]
|
| 1141 |
|
| 1142 |
-
|
| 1143 |
def rag_response(query, selected_doc_ids=None):
|
| 1144 |
"""
|
| 1145 |
Handle queries by searching both the main knowledge base and the selected documents.
|
|
@@ -1228,24 +1225,24 @@ def cache_response(query, response, ttl=3600):
|
|
| 1228 |
|
| 1229 |
|
| 1230 |
|
| 1231 |
-
tavily_tool = TavilySearchResults(
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
-
|
| 1238 |
-
|
| 1239 |
-
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
)
|
| 1244 |
# Compile all tool functions into a list
|
| 1245 |
tools = [
|
| 1246 |
knowledge_base_tool, # Tool for querying the knowledge base and retrieving responses
|
| 1247 |
-
tavily_tool,
|
| 1248 |
-
|
| 1249 |
]
|
| 1250 |
|
| 1251 |
prompt_message = f"""
|
|
@@ -2170,7 +2167,7 @@ def handle_prompt(prompt):
|
|
| 2170 |
display_typing_indicator()
|
| 2171 |
cleaned_text = ""
|
| 2172 |
base_instructions = """
|
| 2173 |
-
Dont use trust bucket names literally in the content and headings.
|
| 2174 |
|
| 2175 |
1. **Adhere to Uploaded Document's Style**:
|
| 2176 |
- When asked uploaded files or document means knowledgebase.
|
|
@@ -2180,13 +2177,11 @@ def handle_prompt(prompt):
|
|
| 2180 |
2. **Prioritize Knowledge Base and Internet Sources**:
|
| 2181 |
- Use uploaded documents or knowledge base files as the primary source.
|
| 2182 |
- Perform a Google search to retrieve valid and correct internet links for references, ensuring only accurate and verified source links are used.
|
| 2183 |
-
|
| 2184 |
-
3. **Avoid Flowery Language and AI Jargon**:
|
| 2185 |
-
- Use clear, professional language without exaggerated or vague expressions. Avoid jargon like "beacon," "realm," "exemplifies," etc.
|
| 2186 |
-
|
| 2187 |
4. **Ensure Accuracy**:
|
| 2188 |
- Provide only verifiable and accurate information. Do not include placeholders, fabricated URLs, or vague references.
|
| 2189 |
-
- *When finding trustbuilders Be over specific with numbers,names,dollars, programs ,awards and action*
|
|
|
|
| 2190 |
|
| 2191 |
"""
|
| 2192 |
|
|
|
|
| 18 |
import redis
|
| 19 |
|
| 20 |
import serpapi
|
| 21 |
+
from serpapi import Client # Assuming serpapi.Client is the correct import
|
| 22 |
+
|
| 23 |
import requests
|
| 24 |
import streamlit.components.v1 as components
|
| 25 |
import smtplib
|
|
|
|
| 1096 |
# Retrieve the top 5 most relevant documents
|
| 1097 |
retrieved_docs = st.session_state["faiss_db"].similarity_search(query, k=3)
|
| 1098 |
return retrieved_docs
|
| 1099 |
+
|
| 1100 |
+
# Asynchronous function to validate a single URL
|
| 1101 |
+
async def validate_url(session, url):
|
| 1102 |
+
try:
|
| 1103 |
+
async with session.head(url, allow_redirects=True, timeout=5) as response:
|
| 1104 |
+
if response.status == 200:
|
| 1105 |
+
return True
|
| 1106 |
+
return False
|
| 1107 |
+
except Exception:
|
| 1108 |
+
return False
|
| 1109 |
+
|
| 1110 |
+
# Function to validate a batch of URLs asynchronously
|
| 1111 |
+
async def validate_links_async(urls):
|
| 1112 |
+
async with aiohttp.ClientSession() as session:
|
| 1113 |
+
tasks = [validate_url(session, url) for url in urls]
|
| 1114 |
+
results = await asyncio.gather(*tasks)
|
| 1115 |
+
return results
|
| 1116 |
+
|
| 1117 |
+
# Function to perform a Google search and validate links
|
| 1118 |
def google_search(query):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1119 |
try:
|
| 1120 |
+
search_client = Client(api_key=serper_api_key)
|
| 1121 |
+
results = search_client.search({"engine": "google", "q": query})
|
| 1122 |
+
|
| 1123 |
+
# Extract URLs and snippets from the search results
|
| 1124 |
+
organic_results = results.get("organic_results", [])
|
| 1125 |
+
urls = [result.get("link") for result in organic_results if result.get("link")]
|
| 1126 |
+
snippets = [result.get("snippet") for result in organic_results if result.get("link")]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1127 |
|
| 1128 |
+
# Validate URLs asynchronously
|
| 1129 |
+
valid_links = asyncio.run(validate_links_async(urls))
|
| 1130 |
+
|
| 1131 |
+
# Filter valid snippets
|
| 1132 |
+
valid_snippets = [snippet for snippet, is_valid in zip(snippets, valid_links) if is_valid]
|
| 1133 |
+
|
| 1134 |
+
return valid_snippets
|
| 1135 |
+
|
| 1136 |
except Exception as e:
|
| 1137 |
+
logger.error(f"Error in Google search: {e}")
|
|
|
|
| 1138 |
return ["Error occurred during Google search"]
|
| 1139 |
|
|
|
|
| 1140 |
def rag_response(query, selected_doc_ids=None):
|
| 1141 |
"""
|
| 1142 |
Handle queries by searching both the main knowledge base and the selected documents.
|
|
|
|
| 1225 |
|
| 1226 |
|
| 1227 |
|
| 1228 |
+
# tavily_tool = TavilySearchResults(
|
| 1229 |
+
# max_results=10,
|
| 1230 |
+
# search_depth="advanced",
|
| 1231 |
+
# topic="news",
|
| 1232 |
+
# days=7,
|
| 1233 |
+
# include_answer=True,
|
| 1234 |
+
# include_raw_content=True,
|
| 1235 |
+
# # include_domains=[...],
|
| 1236 |
+
# exclude_domains=['example.com'],
|
| 1237 |
+
# # name="...", # overwrite default tool name
|
| 1238 |
+
# # description="...", # overwrite default tool description
|
| 1239 |
+
# # args_schema=..., # overwrite default args_schema: BaseModel
|
| 1240 |
+
# )
|
| 1241 |
# Compile all tool functions into a list
|
| 1242 |
tools = [
|
| 1243 |
knowledge_base_tool, # Tool for querying the knowledge base and retrieving responses
|
| 1244 |
+
#tavily_tool,
|
| 1245 |
+
google_search_tool, # Tool for performing a Google search and retrieving search result snippets
|
| 1246 |
]
|
| 1247 |
|
| 1248 |
prompt_message = f"""
|
|
|
|
| 2167 |
display_typing_indicator()
|
| 2168 |
cleaned_text = ""
|
| 2169 |
base_instructions = """
|
| 2170 |
+
Dont use trust bucket names literally in the content and headings.Avoid flowery words.
|
| 2171 |
|
| 2172 |
1. **Adhere to Uploaded Document's Style**:
|
| 2173 |
- When asked uploaded files or document means knowledgebase.
|
|
|
|
| 2177 |
2. **Prioritize Knowledge Base and Internet Sources**:
|
| 2178 |
- Use uploaded documents or knowledge base files as the primary source.
|
| 2179 |
- Perform a Google search to retrieve valid and correct internet links for references, ensuring only accurate and verified source links are used.
|
| 2180 |
+
|
|
|
|
|
|
|
|
|
|
| 2181 |
4. **Ensure Accuracy**:
|
| 2182 |
- Provide only verifiable and accurate information. Do not include placeholders, fabricated URLs, or vague references.
|
| 2183 |
+
- *When finding trustbuilders *Be over specific with numbers,names,dollars, programs ,awards and action**.
|
| 2184 |
+
- Give output in proper formatting.
|
| 2185 |
|
| 2186 |
"""
|
| 2187 |
|