Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import json | |
| excluded_urls = ["finance.yahoo.com", "google.com/finance"] | |
| def search_duckduckgo(keywords): | |
| url = f"https://duckduckgo.com/html/?q={'+'.join(keywords)}" | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" | |
| } | |
| try: | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| return response.text | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error fetching search results: {e}") | |
| return "" | |
| def parse_results(html, keywords): | |
| soup = BeautifulSoup(html, "html.parser") | |
| results = soup.select(".result") | |
| parsed_results = [] | |
| for result in results: | |
| try: | |
| link = result.select_one(".result__a").get( | |
| "href" | |
| ) # Changed selector for link | |
| title = result.select_one(".result__a").text # Changed selector for title | |
| description = result.select_one( | |
| ".result__snippet" | |
| ) # Kept the same selector for description | |
| if description: | |
| description = description.text | |
| else: | |
| description = "" | |
| result_data = { | |
| "Link": link, | |
| "Title": title, | |
| "Description": description, | |
| } | |
| # Check if the link is not in excluded URLs | |
| if not any(excluded_url in link for excluded_url in excluded_urls): | |
| # Check if any keyword is in title, description, or link | |
| if any( | |
| keyword.lower() in result_data["Title"].lower() | |
| or keyword.lower() in result_data["Description"].lower() | |
| or keyword.lower() in result_data["Link"].lower() | |
| for keyword in keywords | |
| ): | |
| print(result_data) | |
| parsed_results.append(result_data) | |
| except Exception as e: | |
| print(f"Error parsing result: {e}") | |
| return parsed_results | |
| # keywords = ["tatasteel", "finance", "news"] | |
| def perform_search(keywords): | |
| html = search_duckduckgo(keywords) | |
| if html: | |
| results = parse_results(html, keywords) | |
| if results: | |
| with open("results.json", "w", encoding="utf-8") as f: | |
| json.dump(results, f, ensure_ascii=False, indent=4) | |
| else: | |
| print("No results found.") | |
| else: | |
| print("Failed to fetch search results.") | |
| # if __name__ == "__main__": | |
| # perform_search(keywords = keywords) | |