Update app.py
Browse files
app.py
CHANGED
|
@@ -148,6 +148,8 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
| 148 |
all_results = []
|
| 149 |
max_chars_per_page = 8000 # Limit the number of characters from each webpage to stay under the token limit
|
| 150 |
|
|
|
|
|
|
|
| 151 |
with requests.Session() as session:
|
| 152 |
while start < num_results:
|
| 153 |
try:
|
|
@@ -169,17 +171,23 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
| 169 |
verify=ssl_verify,
|
| 170 |
)
|
| 171 |
resp.raise_for_status()
|
|
|
|
| 172 |
except requests.exceptions.RequestException as e:
|
|
|
|
| 173 |
break
|
| 174 |
|
| 175 |
soup = BeautifulSoup(resp.text, "html.parser")
|
| 176 |
result_block = soup.find_all("div", attrs={"class": "g"})
|
| 177 |
if not result_block:
|
|
|
|
| 178 |
break
|
|
|
|
|
|
|
| 179 |
for result in result_block:
|
| 180 |
link = result.find("a", href=True)
|
| 181 |
if link:
|
| 182 |
link = link["href"]
|
|
|
|
| 183 |
try:
|
| 184 |
webpage = session.get(link, headers=headers, timeout=timeout)
|
| 185 |
webpage.raise_for_status()
|
|
@@ -187,11 +195,26 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
| 187 |
if len(visible_text) > max_chars_per_page:
|
| 188 |
visible_text = visible_text[:max_chars_per_page] + "..."
|
| 189 |
all_results.append({"link": link, "text": visible_text})
|
|
|
|
| 190 |
except requests.exceptions.RequestException as e:
|
|
|
|
| 191 |
all_results.append({"link": link, "text": None})
|
| 192 |
else:
|
|
|
|
| 193 |
all_results.append({"link": None, "text": None})
|
| 194 |
start += len(result_block)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
return all_results
|
| 196 |
|
| 197 |
def ask_question(question, temperature, top_p, repetition_penalty, web_search):
|
|
|
|
| 148 |
all_results = []
|
| 149 |
max_chars_per_page = 8000 # Limit the number of characters from each webpage to stay under the token limit
|
| 150 |
|
| 151 |
+
print(f"Starting Google search for term: '{term}'")
|
| 152 |
+
|
| 153 |
with requests.Session() as session:
|
| 154 |
while start < num_results:
|
| 155 |
try:
|
|
|
|
| 171 |
verify=ssl_verify,
|
| 172 |
)
|
| 173 |
resp.raise_for_status()
|
| 174 |
+
print(f"Successfully retrieved search results page (start={start})")
|
| 175 |
except requests.exceptions.RequestException as e:
|
| 176 |
+
print(f"Error retrieving search results: {e}")
|
| 177 |
break
|
| 178 |
|
| 179 |
soup = BeautifulSoup(resp.text, "html.parser")
|
| 180 |
result_block = soup.find_all("div", attrs={"class": "g"})
|
| 181 |
if not result_block:
|
| 182 |
+
print("No results found on this page")
|
| 183 |
break
|
| 184 |
+
|
| 185 |
+
print(f"Found {len(result_block)} results on this page")
|
| 186 |
for result in result_block:
|
| 187 |
link = result.find("a", href=True)
|
| 188 |
if link:
|
| 189 |
link = link["href"]
|
| 190 |
+
print(f"Processing link: {link}")
|
| 191 |
try:
|
| 192 |
webpage = session.get(link, headers=headers, timeout=timeout)
|
| 193 |
webpage.raise_for_status()
|
|
|
|
| 195 |
if len(visible_text) > max_chars_per_page:
|
| 196 |
visible_text = visible_text[:max_chars_per_page] + "..."
|
| 197 |
all_results.append({"link": link, "text": visible_text})
|
| 198 |
+
print(f"Successfully extracted text from {link}")
|
| 199 |
except requests.exceptions.RequestException as e:
|
| 200 |
+
print(f"Error retrieving webpage content: {e}")
|
| 201 |
all_results.append({"link": link, "text": None})
|
| 202 |
else:
|
| 203 |
+
print("No link found for this result")
|
| 204 |
all_results.append({"link": None, "text": None})
|
| 205 |
start += len(result_block)
|
| 206 |
+
|
| 207 |
+
print(f"Search completed. Total results: {len(all_results)}")
|
| 208 |
+
print("Search results:")
|
| 209 |
+
for i, result in enumerate(all_results, 1):
|
| 210 |
+
print(f"Result {i}:")
|
| 211 |
+
print(f" Link: {result['link']}")
|
| 212 |
+
if result['text']:
|
| 213 |
+
print(f" Text: {result['text'][:100]}...") # Print first 100 characters
|
| 214 |
+
else:
|
| 215 |
+
print(" Text: None")
|
| 216 |
+
print("End of search results")
|
| 217 |
+
|
| 218 |
return all_results
|
| 219 |
|
| 220 |
def ask_question(question, temperature, top_p, repetition_penalty, web_search):
|