worth-brain / agents /deals.py
MightyOctopus's picture
Add type hint for the fetch method
42b564f
from pydantic import BaseModel, Field
from typing import List, Dict, Self, Any
from bs4 import BeautifulSoup
import re
import feedparser
from tqdm import tqdm
import requests
import time
import ssl
### Add ssl to prevent the ssl issue for feedparser accessing.
if hasattr(ssl, '_create_unverified_context'):
ssl._create_default_https_context = ssl._create_unverified_context
feeds = [
"https://www.dealnews.com/c142/Electronics/?rss=1",
"https://www.dealnews.com/c39/Computers/?rss=1",
"https://www.dealnews.com/f1912/Smart-Home/?rss=1",
]
# You could also add: "https://www.dealnews.com/c238/Automotive/?rss=1"
# "https://www.dealnews.com/c196/Home-Garden/?rss=1"
def extract(html_snippet: str) -> str:
"""
A utility function that uses Beautiful Soup to clean up this HTML snippet and extract useful text
:param html_snippet: text wrapped up with HTML elements to clean up into clean text.
"""
soup = BeautifulSoup(html_snippet, features="html.parser")
snippet_div = soup.find("div", class_="snippet summary")
if snippet_div:
return snippet_div.get_text(strip=True).replace("\n", " ")
return soup.get_text(strip=True).replace("\n", " ")
class ScrapedDeal:
"""
A class to represent a Deal retrieved from an RSS feed
"""
category: str
title: str
summary: str
url: str
details: str
features: str
def __init__(self, entry: Dict[str, str]):
"""
Populate this instance based on the provided dict
"""
self.title = entry["title"]
self.summary = extract(entry["summary"])
self.url = entry["links"][0]["href"]
raw_page_content = requests.get(self.url).content ### Get text at the page level (for product details)
soup = BeautifulSoup(raw_page_content, "html.parser")
content = soup.find("div", class_="content-section").get_text()
content = content.replace("\nmore", "").replace("\n", " ")
if "Features" in content:
self.details, self.features = content.split("Features", 1)
else:
self.details = content
self.features = ""
self.truncate()
def truncate(self):
"""
Set a text limit to the title, details, features of the content
:return: None
"""
self.title = self.title[:100]
self.details = self.details[:500]
self.features = self.features[:500]
def __repr__(self):
"""
Return a string to describe this deal
"""
return f"<{self.title}>"
def describe(self):
"""
Return a longer string to describe this deal for use in calling a model
"""
return f"Title: {self.title}\n\nDetails: {self.details.strip()}\n\nFeatures: {self.features.strip()}\n\nURL: {self.url}"
@classmethod
def fetch(cls, show_progress: bool = False) -> List["ScrapedDeal"]: #forward reference
"""
Retrieve all deals from the selected RSS feeds
"""
deals = []
feed_iter = tqdm(feeds) if show_progress else feeds
for feed_url in feed_iter:
feed = feedparser.parse(
feed_url,
request_headers={
"User-Agent": "Mozilla/5.0 (compatible; DealFetcher/1.0)",
}
)
for entry in feed["entries"][:10]:
deals.append(cls(entry))
time.sleep(0.5)
return deals
class Deal(BaseModel):
"""
A class to Represent a Deal with a summary description
"""
product_description: str = Field(
description="Your clearly expressed summary of the product in 3-4 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a short paragraph of text for each item you choose."
)
price: float = Field(
description="The offered price of this product, as advertised in the deal. Be sure to give the actual price; for example, if a deal is described as $100 off the usual $300 price, you should respond with $200"
)
url: str = Field(
description="The URL of the deal, as provided in the input"
)
class DealSelection(BaseModel):
"""
A class to Represent a list of Deals
"""
deals: List[Deal] = Field(
description="Your selection of the 5 deals that have the most detailed, high quality description and the most clear price. You should be confident that the price reflects the deal, that it is a good deal, with a clear description"
)
class Opportunity(BaseModel):
"""
A class to represent a possible opportunity: a Deal where we estimate
it should cost more than it's being offered
"""
deal: Deal
estimate: float
discount: float