Spaces:

ash2203
/

AISearchEngine

Running

App Files Files Community

ash2203 commited on Oct 25, 2024

Commit

cbdf8d8

verified ·

1 Parent(s): b6441be

Update brave.py

Browse files

Files changed (1) hide show

brave.py +41 -21

brave.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 from dotenv import load_dotenv
 import requests
 from langchain_community.document_loaders import WebBaseLoader
-from groq import Groq
 from bs4 import BeautifulSoup
 import re
 import time
@@ -15,9 +15,9 @@ load_dotenv()
 # Initialize API clients
 BRAVE_API_KEY = os.getenv("BRAVE_API_KEY")
 BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/news/search"
-groq_api_key = os.getenv("GROQ_API_KEY")
-groq_client = Groq(api_key=groq_api_key)
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 def clean_content(content):
@@ -25,11 +25,22 @@ def clean_content(content):
     soup = BeautifulSoup(content, 'html.parser')
     # Remove unwanted elements
-    for element in soup(['header', 'footer', 'nav', 'aside']):
         element.decompose()
-    # Get text content
-    text = soup.get_text()
     # Remove extra spaces and newlines
     text = re.sub(r'\s+', ' ', text).strip()
@@ -39,7 +50,7 @@ def clean_content(content):
     return text
-@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 def summarize_content(content, max_tokens=4000):
     summarization_prompt = f"""Summarize the following content, preserving important details, facts, and figures. This summary will be used for research and news purposes, so accuracy and comprehensiveness are crucial. Keep the summary within approximately {max_tokens} tokens.
@@ -49,21 +60,21 @@ def summarize_content(content, max_tokens=4000):
     Summary:"""
     try:
-        chat_completion = groq_client.chat.completions.create(
             messages=[
                 {"role": "system", "content": "You are an expert summarizer, capable of condensing information while retaining crucial details."},
                 {"role": "user", "content": summarization_prompt}
             ],
-            model="llama-3.1-70b-versatile",
-            max_tokens=max_tokens,
         )
-        summary = chat_completion.choices[0].message.content
         if not summary.strip():
-            raise ValueError("Empty summary received from LLM")
         return summary
     except Exception as e:
-        raise ValueError(f"Error in LLM call: {str(e)}")
 def perform_web_search(query, num_results=2):
     headers = {
@@ -122,6 +133,7 @@ def perform_web_search(query, num_results=2):
 def load_web_content(urls):
     loader = WebBaseLoader(urls)
     documents = loader.load()
     cleaned_contents = []
     summarized_contents = []
@@ -132,7 +144,13 @@ def load_web_content(urls):
             print(f"Cleaned content for URL {i+1}:")
             print(cleaned_content[:500] + "..." if len(cleaned_content) > 500 else cleaned_content)
             print("\n" + "-"*50 + "\n")
             summarized_content = summarize_content(cleaned_content)
             summarized_contents.append(summarized_content)
             print(f"Summarized content for URL {i+1}:")
@@ -140,6 +158,9 @@ def load_web_content(urls):
             print("\n" + "-"*50 + "\n")
         except Exception as e:
             print(f"Error processing content for URL {i+1}: {str(e)}")
     if not summarized_contents:
         print("Error: No content could be processed")
@@ -160,19 +181,19 @@ def generate_detailed_explanation(query, context):
     Explanation:"""
     try:
-        chat_completion = groq_client.chat.completions.create(
             messages=[
                 {"role": "system", "content": "You are a knowledgeable assistant that provides good and easy to understand explanations on various topics, incorporating all relevant information from the given context."},
                 {"role": "user", "content": prompt}
             ],
-            model="llama-3.1-8b-instant",
-            max_tokens=7000,  # Reduced to stay within the 8000 token limit
         )
-        explanation = chat_completion.choices[0].message.content
         if not explanation.strip():
-            print("Error: Empty explanation received from LLM")
-            raise ValueError("Empty explanation received from LLM")
         return explanation
     except Exception as e:
         print(f"Error in generate_detailed_explanation: {str(e)}")
@@ -193,4 +214,3 @@ def main():
 if __name__ == "__main__":
     main()

 from dotenv import load_dotenv
 import requests
 from langchain_community.document_loaders import WebBaseLoader
+from openai import OpenAI
 from bs4 import BeautifulSoup
 import re
 import time
 # Initialize API clients
 BRAVE_API_KEY = os.getenv("BRAVE_API_KEY")
 BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/news/search"
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+openai_client = OpenAI(api_key=OPENAI_API_KEY)
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 def clean_content(content):
     soup = BeautifulSoup(content, 'html.parser')
     # Remove unwanted elements
+    for element in soup(['header', 'footer', 'nav', 'aside', 'menu']):
         element.decompose()
+    # Try to find the main content
+    main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content')
+    if main_content:
+        # If a main content area is found, use that
+        text = main_content.get_text()
+    else:
+        # If no main content area is found, use the body
+        body = soup.find('body')
+        if body:
+            text = body.get_text()
+        else:
+            text = soup.get_text()
     # Remove extra spaces and newlines
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 def summarize_content(content, max_tokens=4000):
     summarization_prompt = f"""Summarize the following content, preserving important details, facts, and figures. This summary will be used for research and news purposes, so accuracy and comprehensiveness are crucial. Keep the summary within approximately {max_tokens} tokens.
     Summary:"""
     try:
+        response = openai_client.chat.completions.create(
+            model="gpt-4o-mini",
             messages=[
                 {"role": "system", "content": "You are an expert summarizer, capable of condensing information while retaining crucial details."},
                 {"role": "user", "content": summarization_prompt}
             ],
+            max_tokens=max_tokens
         )
+        summary = response.choices[0].message.content
         if not summary.strip():
+            raise ValueError("Empty summary received from OpenAI")
         return summary
     except Exception as e:
+        raise ValueError(f"Error in OpenAI API call: {str(e)}")
 def perform_web_search(query, num_results=2):
     headers = {
 def load_web_content(urls):
     loader = WebBaseLoader(urls)
     documents = loader.load()
+    print('Documents: ', documents)
     cleaned_contents = []
     summarized_contents = []
             print(f"Cleaned content for URL {i+1}:")
             print(cleaned_content[:500] + "..." if len(cleaned_content) > 500 else cleaned_content)
             print("\n" + "-"*50 + "\n")
+            print('Cleaned content: ', cleaned_content)
+            print('-'*50)
+            print(len(cleaned_content))
+            cleaned_content = cleaned_content.replace('\n', ' ')
+            cleaned_content = cleaned_content.replace('\t', ' ')
+            cleaned_content = cleaned_content[:1000]
             summarized_content = summarize_content(cleaned_content)
             summarized_contents.append(summarized_content)
             print(f"Summarized content for URL {i+1}:")
             print("\n" + "-"*50 + "\n")
         except Exception as e:
             print(f"Error processing content for URL {i+1}: {str(e)}")
+            print(f"Full error details: {repr(e)}")
+            print(f"URL: {urls[i]}")
+            print("Skipping this URL and continuing with the next one.")
     if not summarized_contents:
         print("Error: No content could be processed")
     Explanation:"""
     try:
+        response = openai_client.chat.completions.create(
+            model="gpt-4o-mini",
             messages=[
                 {"role": "system", "content": "You are a knowledgeable assistant that provides good and easy to understand explanations on various topics, incorporating all relevant information from the given context."},
                 {"role": "user", "content": prompt}
             ],
+            max_tokens=4096  # Adjust as needed
         )
+        explanation = response.choices[0].message.content
         if not explanation.strip():
+            print("Error: Empty explanation received from OpenAI")
+            raise ValueError("Empty explanation received from OpenAI")
         return explanation
     except Exception as e:
         print(f"Error in generate_detailed_explanation: {str(e)}")
 if __name__ == "__main__":
     main()