Copy-AI

Build error

App Files Files Community

Wajahat698 commited on Dec 12, 2024

Commit

0068dff

verified ·

1 Parent(s): 44ba824

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -61

app.py CHANGED Viewed

@@ -18,6 +18,8 @@ from urllib.parse import quote, urlparse
 import redis
 import serpapi
 import requests
 import streamlit.components.v1 as components
 import smtplib
@@ -1094,52 +1096,47 @@ def search_knowledge_base(query):
     # Retrieve the top 5 most relevant documents
     retrieved_docs = st.session_state["faiss_db"].similarity_search(query, k=3)
     return retrieved_docs
 def google_search(query):
-    """
-    Performs a Google search using the SerpApi service and retrieves search result snippets.
-    This function uses the SerpApi client to perform a Google search based on the provided query.
-    It extracts and returns the snippets from the organic search results.
-    Args:
-        query (str): The search query to be used for the Google search.
-    Returns:
-        list: A list of snippets from the organic search results. If an error occurs, returns a list with an error message.
-    Raises:
-        requests.exceptions.HTTPError: If an HTTP error occurs during the search, it is logged and an error message is returned.
-        Exception: For any other general errors, they are logged and an error message is returned.
-    """
     try:
-        # Set up connection to google.serper.dev API
-        conn = http.client.HTTPSConnection("google.serper.dev")
-        payload = json.dumps({"q": query})
-        headers = {
-            "X-API-KEY": "07b4113c2730711b568623b13f7c88078bab9c78",
-            "Content-Type": "application/json",
-        }
-        # Send POST request to the API
-        conn.request("POST", "/search", payload, headers)
-        # Get response and decode the data
-        res = conn.getresponse()
-        data = res.read()
-        results = json.loads(data.decode("utf-8"))
-        # Extract snippets from organic search results
-        snippets = [result["snippet"] for result in results.get("organic", [])]
-        # Return the list of snippets
-        return snippets
-    except http.client.HTTPException as http_err:
-        # Log HTTP errors and return a specific error message
-        print(f"HTTP error occurred: {http_err}")
-        return ["HTTP error occurred during Google search"]
     except Exception as e:
-        # Log any other general errors and return a generic error message
-        print(f"General Error: {e}")
         return ["Error occurred during Google search"]
 def rag_response(query, selected_doc_ids=None):
     """
     Handle queries by searching both the main knowledge base and the selected documents.
@@ -1228,24 +1225,24 @@ def cache_response(query, response, ttl=3600):
-tavily_tool = TavilySearchResults(
-    max_results=10,
-    search_depth="advanced",
-    topic="news",
-    days=7,
-    include_answer=True,
-    include_raw_content=True,
-    # include_domains=[...],
-    exclude_domains=['example.com'],
-    # name="...",            # overwrite default tool name
-    # description="...",     # overwrite default tool description
-    # args_schema=...,       # overwrite default args_schema: BaseModel
-)
 # Compile all tool functions into a list
 tools = [
     knowledge_base_tool,  # Tool for querying the knowledge base and retrieving responses
-    tavily_tool,
-    # google_search_tool,  # Tool for performing a Google search and retrieving search result snippets
 ]
 prompt_message = f"""
@@ -2170,7 +2167,7 @@ def handle_prompt(prompt):
                         display_typing_indicator()
                 cleaned_text = ""
                 base_instructions = """
-                Dont use trust bucket names literally in the content and headings.Dont use flowery words.
                 1. **Adhere to Uploaded Document's Style**:
                    - When asked uploaded files or document means knowledgebase.
@@ -2180,13 +2177,11 @@ def handle_prompt(prompt):
                 2. **Prioritize Knowledge Base and Internet Sources**:
                    - Use uploaded documents or knowledge base files as the primary source.
                    - Perform a Google search to retrieve valid and correct internet links for references, ensuring only accurate and verified source links are used.
-                3. **Avoid Flowery Language and AI Jargon**:
-                   - Use clear, professional language without exaggerated or vague expressions. Avoid jargon like "beacon," "realm," "exemplifies," etc.
                 4. **Ensure Accuracy**:
                    - Provide only verifiable and accurate information. Do not include placeholders, fabricated URLs, or vague references.
-                - *When finding trustbuilders Be over specific with numbers,names,dollars, programs ,awards and action*
                 """

 import redis
 import serpapi
+from serpapi import Client  # Assuming serpapi.Client is the correct import
 import requests
 import streamlit.components.v1 as components
 import smtplib
     # Retrieve the top 5 most relevant documents
     retrieved_docs = st.session_state["faiss_db"].similarity_search(query, k=3)
     return retrieved_docs
+# Asynchronous function to validate a single URL
+async def validate_url(session, url):
+    try:
+        async with session.head(url, allow_redirects=True, timeout=5) as response:
+            if response.status == 200:
+                return True
+            return False
+    except Exception:
+        return False
+# Function to validate a batch of URLs asynchronously
+async def validate_links_async(urls):
+    async with aiohttp.ClientSession() as session:
+        tasks = [validate_url(session, url) for url in urls]
+        results = await asyncio.gather(*tasks)
+        return results
+# Function to perform a Google search and validate links
 def google_search(query):
     try:
+        search_client = Client(api_key=serper_api_key)
+        results = search_client.search({"engine": "google", "q": query})
+        # Extract URLs and snippets from the search results
+        organic_results = results.get("organic_results", [])
+        urls = [result.get("link") for result in organic_results if result.get("link")]
+        snippets = [result.get("snippet") for result in organic_results if result.get("link")]
+        # Validate URLs asynchronously
+        valid_links = asyncio.run(validate_links_async(urls))
+        # Filter valid snippets
+        valid_snippets = [snippet for snippet, is_valid in zip(snippets, valid_links) if is_valid]
+        return valid_snippets
     except Exception as e:
+        logger.error(f"Error in Google search: {e}")
         return ["Error occurred during Google search"]
 def rag_response(query, selected_doc_ids=None):
     """
     Handle queries by searching both the main knowledge base and the selected documents.
+# tavily_tool = TavilySearchResults(
+#     max_results=10,
+#     search_depth="advanced",
+#     topic="news",
+#     days=7,
+#     include_answer=True,
+#     include_raw_content=True,
+#     # include_domains=[...],
+#     exclude_domains=['example.com'],
+#     # name="...",            # overwrite default tool name
+#     # description="...",     # overwrite default tool description
+#     # args_schema=...,       # overwrite default args_schema: BaseModel
+# )
 # Compile all tool functions into a list
 tools = [
     knowledge_base_tool,  # Tool for querying the knowledge base and retrieving responses
+    #tavily_tool,
+    google_search_tool,  # Tool for performing a Google search and retrieving search result snippets
 ]
 prompt_message = f"""
                         display_typing_indicator()
                 cleaned_text = ""
                 base_instructions = """
+                Dont use trust bucket names literally in the content and headings.Avoid flowery words.
                 1. **Adhere to Uploaded Document's Style**:
                    - When asked uploaded files or document means knowledgebase.
                 2. **Prioritize Knowledge Base and Internet Sources**:
                    - Use uploaded documents or knowledge base files as the primary source.
                    - Perform a Google search to retrieve valid and correct internet links for references, ensuring only accurate and verified source links are used.
                 4. **Ensure Accuracy**:
                    - Provide only verifiable and accurate information. Do not include placeholders, fabricated URLs, or vague references.
+                - *When finding trustbuilders *Be over specific with numbers,names,dollars, programs ,awards and action**.
+                - Give output in proper formatting.
                 """