Spaces:
Runtime error
Runtime error
| from urllib.parse import parse_qs, urlparse | |
| from bs4 import BeautifulSoup | |
| import requests | |
| def scrapeGoogleSearch(query): | |
| finalResponse = [] | |
| searchUrl = f"https://www.google.com/search?q={query}" | |
| response = requests.get(searchUrl) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| with open('soup_dump.html', 'w', encoding='utf-8') as file: | |
| file.write(soup.prettify()) | |
| results = soup.find('body') | |
| mainDiv = soup.find('div', attrs={'id': 'main'}) | |
| answerDiv = ( | |
| mainDiv.select_one('div.PqksIc') | |
| or mainDiv.select_one('div.BNeawe.iBp4i') | |
| ) | |
| if answerDiv: | |
| citationDateDiv = answerDiv.select_one('sub.gMUaMb.r0bn4c.rQMQod') | |
| citationDate = citationDateDiv.text if citationDateDiv else "" | |
| answerText = answerDiv.text.replace(citationDate, '').strip() | |
| citationText = f"Citation Date: {citationDate}" if citationDate else "" | |
| finalResponse.append(f"Verified Answer:\n====\n{answerText}\n{citationText}\n====\n\n") | |
| results = mainDiv.select('div.egMi0.kCrYT') | |
| resultsDesc = mainDiv.select('div.BNeawe.s3v9rd.AP7Wnd .BNeawe.s3v9rd.AP7Wnd:last-child') | |
| if results: | |
| finalResponse.append("Search Results:\n====\n") | |
| for (i, result) in enumerate(results[:10]): | |
| title = result.find('h3').text | |
| link = result.find('a')['href'] | |
| parsedUrl = urlparse(link) | |
| urlParams = parse_qs(parsedUrl.query) | |
| link = urlParams.get('q', [None])[0] | |
| desc = resultsDesc[i].text | |
| finalResponse.append(f"Title: {title}") | |
| finalResponse.append(f"Description: {desc}") | |
| finalResponse.append(f"Link: {link}\n") | |
| else: | |
| print("Failed to retrieve search results.") | |
| return "\n".join(finalResponse) | |