import requests from bs4 import BeautifulSoup import pandas as pd import random import time import os import re import statistics from datetime import datetime class RobustHardwareTracker: def __init__(self): # Preset fallback prices, used when the crawler is completely blocked self.fallback_prices = { "H100": "$28,500 - $32,000", "V100": "$350 - $650", "B300": "Contact Sales (Q4 2025)", } # Masquerade as real browser request headers (added key fields like Accept, Language) self.headers_list = [{ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1" }, { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "Connection": "keep-alive", }] def get_cloud_price(self, chip): """Get cloud rental prices""" urls = { "B300": "https://getdeploying.com/reference/cloud-gpu/nvidia-dgx-b300", "H100": "https://getdeploying.com/reference/cloud-gpu/nvidia-h100", "V100": "https://getdeploying.com/reference/cloud-gpu/nvidia-v100", } try: url = urls.get(chip) if not url: return "N/A" h = random.choice(self.headers_list) resp = requests.get(url, headers=h, timeout=15) if resp.status_code != 200: return "N/A" soup = BeautifulSoup(resp.text, 'html.parser') # Parse GetDeploying table table = soup.find("table") if table: rows = table.find_all("tr") prices = [] for row in rows: txt = row.get_text() if "$" in txt: # Optimized regex: compatible with $2.00, $2, and $1,000.00 match = re.search(r"\$([0-9,]+(?:\.[0-9]+)?)", txt) if match: clean_price = float( match.group(1).replace(",", "")) prices.append(clean_price) if prices: return f"${min(prices):.2f}/hr" return "Sold Out" except Exception as e: print(str(e)) return "Check Provider" def get_hardware_price(self, chip, search_query): """Get eBay hardware selling prices""" if chip == "B300": return self.fallback_prices["B300"] url = "https://www.ebay.com/sch/i.html" params = { "_nkw": search_query, "LH_Sold": "1", "LH_Complete": "1", "rt": "nc" } try: # Add random delay to simulate human operation time.sleep(random.uniform(2.0, 4.0)) h = random.choice(self.headers_list) resp = requests.get(url, params=params, headers=h, timeout=15) soup = BeautifulSoup(resp.text, 'html.parser') price_tags = soup.select(".s-item__price") prices = [] for tag in price_tags: text = tag.get_text(strip=True) # Exclude the first hidden placeholder (Shop on eBay) in eBay search results if "Shop on eBay" in text or not text: continue # Handle price range, take the lowest price if "to" in text: text = text.split("to")[0] # Strengthen regex: extract valid amounts from text match = re.search(r'([0-9,]+(?:\.[0-9]{2})?)', text) if match: try: val = float(match.group(1).replace(",", "")) # Filter out outliers below $100 (usually accessories, manuals, or pure cooling fans) if val > 100: prices.append(val) except ValueError: continue # Stop after collecting 10 valid samples if len(prices) >= 10: break if not prices: return f"{self.fallback_prices[chip]} (Est)" median_val = statistics.median(prices) return f"${median_val:,.2f}" except Exception as e: print(str(e)) return f"{self.fallback_prices[chip]} (Est)" def collect_data(self): inventory = [ { "Code": "B300", "Name": "Blackwell B300", "Query": "NVIDIA B300 GPU" }, { "Code": "H100", "Name": "Hopper H100", "Query": "NVIDIA H100 PCIe 80GB" }, { "Code": "V100", "Name": "Volta V100", "Query": "NVIDIA Tesla V100 16GB PCIe" }, ] results = [] current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"Fetching data [{current_time}]...") for item in inventory: rent = self.get_cloud_price(item["Code"]) buy = self.get_hardware_price(item["Code"], item["Query"]) results.append({ "Date": current_time, "Chip": item["Name"], "Cloud Rent (/hr)": rent, "Hardware Price": buy }) print(f" -> Fetched {item['Name']}") return results def save_to_csv(new_data, filename="./gpu_price_history.csv"): new_df = pd.DataFrame(new_data) if os.path.exists(filename): try: existing_df = pd.read_csv(filename) combined_df = pd.concat([existing_df, new_df], ignore_index=True) combined_df.to_csv(filename, index=False) print(f"Successfully appended data to {filename}") except Exception as e: print(f"Error writing to CSV: {e}") new_df.to_csv(filename, index=False) else: new_df.to_csv(filename, index=False) print(f"New file created at {filename}") if __name__ == "__main__": tracker = RobustHardwareTracker() data = tracker.collect_data() save_to_csv(data)