Wajahat698 commited on
Commit
0068dff
·
verified ·
1 Parent(s): 44ba824

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -61
app.py CHANGED
@@ -18,6 +18,8 @@ from urllib.parse import quote, urlparse
18
  import redis
19
 
20
  import serpapi
 
 
21
  import requests
22
  import streamlit.components.v1 as components
23
  import smtplib
@@ -1094,52 +1096,47 @@ def search_knowledge_base(query):
1094
  # Retrieve the top 5 most relevant documents
1095
  retrieved_docs = st.session_state["faiss_db"].similarity_search(query, k=3)
1096
  return retrieved_docs
1097
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1098
  def google_search(query):
1099
- """
1100
- Performs a Google search using the SerpApi service and retrieves search result snippets.
1101
- This function uses the SerpApi client to perform a Google search based on the provided query.
1102
- It extracts and returns the snippets from the organic search results.
1103
- Args:
1104
- query (str): The search query to be used for the Google search.
1105
- Returns:
1106
- list: A list of snippets from the organic search results. If an error occurs, returns a list with an error message.
1107
- Raises:
1108
- requests.exceptions.HTTPError: If an HTTP error occurs during the search, it is logged and an error message is returned.
1109
- Exception: For any other general errors, they are logged and an error message is returned.
1110
- """
1111
  try:
1112
- # Set up connection to google.serper.dev API
1113
- conn = http.client.HTTPSConnection("google.serper.dev")
1114
- payload = json.dumps({"q": query})
1115
- headers = {
1116
- "X-API-KEY": "07b4113c2730711b568623b13f7c88078bab9c78",
1117
- "Content-Type": "application/json",
1118
- }
1119
-
1120
- # Send POST request to the API
1121
- conn.request("POST", "/search", payload, headers)
1122
-
1123
- # Get response and decode the data
1124
- res = conn.getresponse()
1125
- data = res.read()
1126
- results = json.loads(data.decode("utf-8"))
1127
-
1128
- # Extract snippets from organic search results
1129
- snippets = [result["snippet"] for result in results.get("organic", [])]
1130
 
1131
- # Return the list of snippets
1132
- return snippets
1133
- except http.client.HTTPException as http_err:
1134
- # Log HTTP errors and return a specific error message
1135
- print(f"HTTP error occurred: {http_err}")
1136
- return ["HTTP error occurred during Google search"]
 
 
1137
  except Exception as e:
1138
- # Log any other general errors and return a generic error message
1139
- print(f"General Error: {e}")
1140
  return ["Error occurred during Google search"]
1141
 
1142
-
1143
  def rag_response(query, selected_doc_ids=None):
1144
  """
1145
  Handle queries by searching both the main knowledge base and the selected documents.
@@ -1228,24 +1225,24 @@ def cache_response(query, response, ttl=3600):
1228
 
1229
 
1230
 
1231
- tavily_tool = TavilySearchResults(
1232
- max_results=10,
1233
- search_depth="advanced",
1234
- topic="news",
1235
- days=7,
1236
- include_answer=True,
1237
- include_raw_content=True,
1238
- # include_domains=[...],
1239
- exclude_domains=['example.com'],
1240
- # name="...", # overwrite default tool name
1241
- # description="...", # overwrite default tool description
1242
- # args_schema=..., # overwrite default args_schema: BaseModel
1243
- )
1244
  # Compile all tool functions into a list
1245
  tools = [
1246
  knowledge_base_tool, # Tool for querying the knowledge base and retrieving responses
1247
- tavily_tool,
1248
- # google_search_tool, # Tool for performing a Google search and retrieving search result snippets
1249
  ]
1250
 
1251
  prompt_message = f"""
@@ -2170,7 +2167,7 @@ def handle_prompt(prompt):
2170
  display_typing_indicator()
2171
  cleaned_text = ""
2172
  base_instructions = """
2173
- Dont use trust bucket names literally in the content and headings.Dont use flowery words.
2174
 
2175
  1. **Adhere to Uploaded Document's Style**:
2176
  - When asked uploaded files or document means knowledgebase.
@@ -2180,13 +2177,11 @@ def handle_prompt(prompt):
2180
  2. **Prioritize Knowledge Base and Internet Sources**:
2181
  - Use uploaded documents or knowledge base files as the primary source.
2182
  - Perform a Google search to retrieve valid and correct internet links for references, ensuring only accurate and verified source links are used.
2183
-
2184
- 3. **Avoid Flowery Language and AI Jargon**:
2185
- - Use clear, professional language without exaggerated or vague expressions. Avoid jargon like "beacon," "realm," "exemplifies," etc.
2186
-
2187
  4. **Ensure Accuracy**:
2188
  - Provide only verifiable and accurate information. Do not include placeholders, fabricated URLs, or vague references.
2189
- - *When finding trustbuilders Be over specific with numbers,names,dollars, programs ,awards and action*
 
2190
 
2191
  """
2192
 
 
18
  import redis
19
 
20
  import serpapi
21
+ from serpapi import Client # Assuming serpapi.Client is the correct import
22
+
23
  import requests
24
  import streamlit.components.v1 as components
25
  import smtplib
 
1096
  # Retrieve the top 5 most relevant documents
1097
  retrieved_docs = st.session_state["faiss_db"].similarity_search(query, k=3)
1098
  return retrieved_docs
1099
+
1100
+ # Asynchronous function to validate a single URL
1101
+ async def validate_url(session, url):
1102
+ try:
1103
+ async with session.head(url, allow_redirects=True, timeout=5) as response:
1104
+ if response.status == 200:
1105
+ return True
1106
+ return False
1107
+ except Exception:
1108
+ return False
1109
+
1110
+ # Function to validate a batch of URLs asynchronously
1111
+ async def validate_links_async(urls):
1112
+ async with aiohttp.ClientSession() as session:
1113
+ tasks = [validate_url(session, url) for url in urls]
1114
+ results = await asyncio.gather(*tasks)
1115
+ return results
1116
+
1117
+ # Function to perform a Google search and validate links
1118
  def google_search(query):
 
 
 
 
 
 
 
 
 
 
 
 
1119
  try:
1120
+ search_client = Client(api_key=serper_api_key)
1121
+ results = search_client.search({"engine": "google", "q": query})
1122
+
1123
+ # Extract URLs and snippets from the search results
1124
+ organic_results = results.get("organic_results", [])
1125
+ urls = [result.get("link") for result in organic_results if result.get("link")]
1126
+ snippets = [result.get("snippet") for result in organic_results if result.get("link")]
 
 
 
 
 
 
 
 
 
 
 
1127
 
1128
+ # Validate URLs asynchronously
1129
+ valid_links = asyncio.run(validate_links_async(urls))
1130
+
1131
+ # Filter valid snippets
1132
+ valid_snippets = [snippet for snippet, is_valid in zip(snippets, valid_links) if is_valid]
1133
+
1134
+ return valid_snippets
1135
+
1136
  except Exception as e:
1137
+ logger.error(f"Error in Google search: {e}")
 
1138
  return ["Error occurred during Google search"]
1139
 
 
1140
  def rag_response(query, selected_doc_ids=None):
1141
  """
1142
  Handle queries by searching both the main knowledge base and the selected documents.
 
1225
 
1226
 
1227
 
1228
+ # tavily_tool = TavilySearchResults(
1229
+ # max_results=10,
1230
+ # search_depth="advanced",
1231
+ # topic="news",
1232
+ # days=7,
1233
+ # include_answer=True,
1234
+ # include_raw_content=True,
1235
+ # # include_domains=[...],
1236
+ # exclude_domains=['example.com'],
1237
+ # # name="...", # overwrite default tool name
1238
+ # # description="...", # overwrite default tool description
1239
+ # # args_schema=..., # overwrite default args_schema: BaseModel
1240
+ # )
1241
  # Compile all tool functions into a list
1242
  tools = [
1243
  knowledge_base_tool, # Tool for querying the knowledge base and retrieving responses
1244
+ #tavily_tool,
1245
+ google_search_tool, # Tool for performing a Google search and retrieving search result snippets
1246
  ]
1247
 
1248
  prompt_message = f"""
 
2167
  display_typing_indicator()
2168
  cleaned_text = ""
2169
  base_instructions = """
2170
+ Dont use trust bucket names literally in the content and headings.Avoid flowery words.
2171
 
2172
  1. **Adhere to Uploaded Document's Style**:
2173
  - When asked uploaded files or document means knowledgebase.
 
2177
  2. **Prioritize Knowledge Base and Internet Sources**:
2178
  - Use uploaded documents or knowledge base files as the primary source.
2179
  - Perform a Google search to retrieve valid and correct internet links for references, ensuring only accurate and verified source links are used.
2180
+
 
 
 
2181
  4. **Ensure Accuracy**:
2182
  - Provide only verifiable and accurate information. Do not include placeholders, fabricated URLs, or vague references.
2183
+ - *When finding trustbuilders *Be over specific with numbers,names,dollars, programs ,awards and action**.
2184
+ - Give output in proper formatting.
2185
 
2186
  """
2187