ash2203 commited on
Commit
cbdf8d8
·
verified ·
1 Parent(s): b6441be

Update brave.py

Browse files
Files changed (1) hide show
  1. brave.py +41 -21
brave.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  from dotenv import load_dotenv
3
  import requests
4
  from langchain_community.document_loaders import WebBaseLoader
5
- from groq import Groq
6
  from bs4 import BeautifulSoup
7
  import re
8
  import time
@@ -15,9 +15,9 @@ load_dotenv()
15
  # Initialize API clients
16
  BRAVE_API_KEY = os.getenv("BRAVE_API_KEY")
17
  BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/news/search"
18
- groq_api_key = os.getenv("GROQ_API_KEY")
19
 
20
- groq_client = Groq(api_key=groq_api_key)
21
 
22
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
23
  def clean_content(content):
@@ -25,11 +25,22 @@ def clean_content(content):
25
  soup = BeautifulSoup(content, 'html.parser')
26
 
27
  # Remove unwanted elements
28
- for element in soup(['header', 'footer', 'nav', 'aside']):
29
  element.decompose()
30
 
31
- # Get text content
32
- text = soup.get_text()
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # Remove extra spaces and newlines
35
  text = re.sub(r'\s+', ' ', text).strip()
@@ -39,7 +50,7 @@ def clean_content(content):
39
 
40
  return text
41
 
42
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
43
  def summarize_content(content, max_tokens=4000):
44
  summarization_prompt = f"""Summarize the following content, preserving important details, facts, and figures. This summary will be used for research and news purposes, so accuracy and comprehensiveness are crucial. Keep the summary within approximately {max_tokens} tokens.
45
 
@@ -49,21 +60,21 @@ def summarize_content(content, max_tokens=4000):
49
  Summary:"""
50
 
51
  try:
52
- chat_completion = groq_client.chat.completions.create(
 
53
  messages=[
54
  {"role": "system", "content": "You are an expert summarizer, capable of condensing information while retaining crucial details."},
55
  {"role": "user", "content": summarization_prompt}
56
  ],
57
- model="llama-3.1-70b-versatile",
58
- max_tokens=max_tokens,
59
  )
60
 
61
- summary = chat_completion.choices[0].message.content
62
  if not summary.strip():
63
- raise ValueError("Empty summary received from LLM")
64
  return summary
65
  except Exception as e:
66
- raise ValueError(f"Error in LLM call: {str(e)}")
67
 
68
  def perform_web_search(query, num_results=2):
69
  headers = {
@@ -122,6 +133,7 @@ def perform_web_search(query, num_results=2):
122
  def load_web_content(urls):
123
  loader = WebBaseLoader(urls)
124
  documents = loader.load()
 
125
  cleaned_contents = []
126
  summarized_contents = []
127
 
@@ -132,7 +144,13 @@ def load_web_content(urls):
132
  print(f"Cleaned content for URL {i+1}:")
133
  print(cleaned_content[:500] + "..." if len(cleaned_content) > 500 else cleaned_content)
134
  print("\n" + "-"*50 + "\n")
135
-
 
 
 
 
 
 
136
  summarized_content = summarize_content(cleaned_content)
137
  summarized_contents.append(summarized_content)
138
  print(f"Summarized content for URL {i+1}:")
@@ -140,6 +158,9 @@ def load_web_content(urls):
140
  print("\n" + "-"*50 + "\n")
141
  except Exception as e:
142
  print(f"Error processing content for URL {i+1}: {str(e)}")
 
 
 
143
 
144
  if not summarized_contents:
145
  print("Error: No content could be processed")
@@ -160,19 +181,19 @@ def generate_detailed_explanation(query, context):
160
  Explanation:"""
161
 
162
  try:
163
- chat_completion = groq_client.chat.completions.create(
 
164
  messages=[
165
  {"role": "system", "content": "You are a knowledgeable assistant that provides good and easy to understand explanations on various topics, incorporating all relevant information from the given context."},
166
  {"role": "user", "content": prompt}
167
  ],
168
- model="llama-3.1-8b-instant",
169
- max_tokens=7000, # Reduced to stay within the 8000 token limit
170
  )
171
 
172
- explanation = chat_completion.choices[0].message.content
173
  if not explanation.strip():
174
- print("Error: Empty explanation received from LLM")
175
- raise ValueError("Empty explanation received from LLM")
176
  return explanation
177
  except Exception as e:
178
  print(f"Error in generate_detailed_explanation: {str(e)}")
@@ -193,4 +214,3 @@ def main():
193
 
194
  if __name__ == "__main__":
195
  main()
196
-
 
2
  from dotenv import load_dotenv
3
  import requests
4
  from langchain_community.document_loaders import WebBaseLoader
5
+ from openai import OpenAI
6
  from bs4 import BeautifulSoup
7
  import re
8
  import time
 
15
  # Initialize API clients
16
  BRAVE_API_KEY = os.getenv("BRAVE_API_KEY")
17
  BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/news/search"
18
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
19
 
20
+ openai_client = OpenAI(api_key=OPENAI_API_KEY)
21
 
22
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
23
  def clean_content(content):
 
25
  soup = BeautifulSoup(content, 'html.parser')
26
 
27
  # Remove unwanted elements
28
+ for element in soup(['header', 'footer', 'nav', 'aside', 'menu']):
29
  element.decompose()
30
 
31
+ # Try to find the main content
32
+ main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content')
33
+
34
+ if main_content:
35
+ # If a main content area is found, use that
36
+ text = main_content.get_text()
37
+ else:
38
+ # If no main content area is found, use the body
39
+ body = soup.find('body')
40
+ if body:
41
+ text = body.get_text()
42
+ else:
43
+ text = soup.get_text()
44
 
45
  # Remove extra spaces and newlines
46
  text = re.sub(r'\s+', ' ', text).strip()
 
50
 
51
  return text
52
 
53
+
54
  def summarize_content(content, max_tokens=4000):
55
  summarization_prompt = f"""Summarize the following content, preserving important details, facts, and figures. This summary will be used for research and news purposes, so accuracy and comprehensiveness are crucial. Keep the summary within approximately {max_tokens} tokens.
56
 
 
60
  Summary:"""
61
 
62
  try:
63
+ response = openai_client.chat.completions.create(
64
+ model="gpt-4o-mini",
65
  messages=[
66
  {"role": "system", "content": "You are an expert summarizer, capable of condensing information while retaining crucial details."},
67
  {"role": "user", "content": summarization_prompt}
68
  ],
69
+ max_tokens=max_tokens
 
70
  )
71
 
72
+ summary = response.choices[0].message.content
73
  if not summary.strip():
74
+ raise ValueError("Empty summary received from OpenAI")
75
  return summary
76
  except Exception as e:
77
+ raise ValueError(f"Error in OpenAI API call: {str(e)}")
78
 
79
  def perform_web_search(query, num_results=2):
80
  headers = {
 
133
  def load_web_content(urls):
134
  loader = WebBaseLoader(urls)
135
  documents = loader.load()
136
+ print('Documents: ', documents)
137
  cleaned_contents = []
138
  summarized_contents = []
139
 
 
144
  print(f"Cleaned content for URL {i+1}:")
145
  print(cleaned_content[:500] + "..." if len(cleaned_content) > 500 else cleaned_content)
146
  print("\n" + "-"*50 + "\n")
147
+
148
+ print('Cleaned content: ', cleaned_content)
149
+ print('-'*50)
150
+ print(len(cleaned_content))
151
+ cleaned_content = cleaned_content.replace('\n', ' ')
152
+ cleaned_content = cleaned_content.replace('\t', ' ')
153
+ cleaned_content = cleaned_content[:1000]
154
  summarized_content = summarize_content(cleaned_content)
155
  summarized_contents.append(summarized_content)
156
  print(f"Summarized content for URL {i+1}:")
 
158
  print("\n" + "-"*50 + "\n")
159
  except Exception as e:
160
  print(f"Error processing content for URL {i+1}: {str(e)}")
161
+ print(f"Full error details: {repr(e)}")
162
+ print(f"URL: {urls[i]}")
163
+ print("Skipping this URL and continuing with the next one.")
164
 
165
  if not summarized_contents:
166
  print("Error: No content could be processed")
 
181
  Explanation:"""
182
 
183
  try:
184
+ response = openai_client.chat.completions.create(
185
+ model="gpt-4o-mini",
186
  messages=[
187
  {"role": "system", "content": "You are a knowledgeable assistant that provides good and easy to understand explanations on various topics, incorporating all relevant information from the given context."},
188
  {"role": "user", "content": prompt}
189
  ],
190
+ max_tokens=4096 # Adjust as needed
 
191
  )
192
 
193
+ explanation = response.choices[0].message.content
194
  if not explanation.strip():
195
+ print("Error: Empty explanation received from OpenAI")
196
+ raise ValueError("Empty explanation received from OpenAI")
197
  return explanation
198
  except Exception as e:
199
  print(f"Error in generate_detailed_explanation: {str(e)}")
 
214
 
215
  if __name__ == "__main__":
216
  main()