Spaces:
Running
Running
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import random | |
| import time | |
| import os | |
| import re | |
| import statistics | |
| from datetime import datetime | |
| class RobustHardwareTracker: | |
| def __init__(self): | |
| # Preset fallback prices, used when the crawler is completely blocked | |
| self.fallback_prices = { | |
| "H100": "$28,500 - $32,000", | |
| "V100": "$350 - $650", | |
| "B300": "Contact Sales (Q4 2025)", | |
| } | |
| # Masquerade as real browser request headers (added key fields like Accept, Language) | |
| self.headers_list = [{ | |
| "User-Agent": | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", | |
| "Accept": | |
| "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", | |
| "Accept-Language": "en-US,en;q=0.9", | |
| "Accept-Encoding": "gzip, deflate, br", | |
| "Connection": "keep-alive", | |
| "Upgrade-Insecure-Requests": "1" | |
| }, { | |
| "User-Agent": | |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", | |
| "Accept": | |
| "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| "Accept-Language": "en-US,en;q=0.9", | |
| "Connection": "keep-alive", | |
| }] | |
| def get_cloud_price(self, chip): | |
| """Get cloud rental prices""" | |
| urls = { | |
| "B300": | |
| "https://getdeploying.com/reference/cloud-gpu/nvidia-dgx-b300", | |
| "H100": "https://getdeploying.com/reference/cloud-gpu/nvidia-h100", | |
| "V100": "https://getdeploying.com/reference/cloud-gpu/nvidia-v100", | |
| } | |
| try: | |
| url = urls.get(chip) | |
| if not url: | |
| return "N/A" | |
| h = random.choice(self.headers_list) | |
| resp = requests.get(url, headers=h, timeout=15) | |
| if resp.status_code != 200: | |
| return "N/A" | |
| soup = BeautifulSoup(resp.text, 'html.parser') | |
| # Parse GetDeploying table | |
| table = soup.find("table") | |
| if table: | |
| rows = table.find_all("tr") | |
| prices = [] | |
| for row in rows: | |
| txt = row.get_text() | |
| if "$" in txt: | |
| # Optimized regex: compatible with $2.00, $2, and $1,000.00 | |
| match = re.search(r"\$([0-9,]+(?:\.[0-9]+)?)", txt) | |
| if match: | |
| clean_price = float( | |
| match.group(1).replace(",", "")) | |
| prices.append(clean_price) | |
| if prices: | |
| return f"${min(prices):.2f}/hr" | |
| return "Sold Out" | |
| except Exception as e: | |
| print(str(e)) | |
| return "Check Provider" | |
| def get_hardware_price(self, chip, search_query): | |
| """Get eBay hardware selling prices""" | |
| if chip == "B300": | |
| return self.fallback_prices["B300"] | |
| url = "https://www.ebay.com/sch/i.html" | |
| params = { | |
| "_nkw": search_query, | |
| "LH_Sold": "1", | |
| "LH_Complete": "1", | |
| "rt": "nc" | |
| } | |
| try: | |
| # Add random delay to simulate human operation | |
| time.sleep(random.uniform(2.0, 4.0)) | |
| h = random.choice(self.headers_list) | |
| resp = requests.get(url, params=params, headers=h, timeout=15) | |
| soup = BeautifulSoup(resp.text, 'html.parser') | |
| price_tags = soup.select(".s-item__price") | |
| prices = [] | |
| for tag in price_tags: | |
| text = tag.get_text(strip=True) | |
| # Exclude the first hidden placeholder (Shop on eBay) in eBay search results | |
| if "Shop on eBay" in text or not text: | |
| continue | |
| # Handle price range, take the lowest price | |
| if "to" in text: | |
| text = text.split("to")[0] | |
| # Strengthen regex: extract valid amounts from text | |
| match = re.search(r'([0-9,]+(?:\.[0-9]{2})?)', text) | |
| if match: | |
| try: | |
| val = float(match.group(1).replace(",", "")) | |
| # Filter out outliers below $100 (usually accessories, manuals, or pure cooling fans) | |
| if val > 100: | |
| prices.append(val) | |
| except ValueError: | |
| continue | |
| # Stop after collecting 10 valid samples | |
| if len(prices) >= 10: | |
| break | |
| if not prices: | |
| return f"{self.fallback_prices[chip]} (Est)" | |
| median_val = statistics.median(prices) | |
| return f"${median_val:,.2f}" | |
| except Exception as e: | |
| print(str(e)) | |
| return f"{self.fallback_prices[chip]} (Est)" | |
| def collect_data(self): | |
| inventory = [ | |
| { | |
| "Code": "B300", | |
| "Name": "Blackwell B300", | |
| "Query": "NVIDIA B300 GPU" | |
| }, | |
| { | |
| "Code": "H100", | |
| "Name": "Hopper H100", | |
| "Query": "NVIDIA H100 PCIe 80GB" | |
| }, | |
| { | |
| "Code": "V100", | |
| "Name": "Volta V100", | |
| "Query": "NVIDIA Tesla V100 16GB PCIe" | |
| }, | |
| ] | |
| results = [] | |
| current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| print(f"Fetching data [{current_time}]...") | |
| for item in inventory: | |
| rent = self.get_cloud_price(item["Code"]) | |
| buy = self.get_hardware_price(item["Code"], item["Query"]) | |
| results.append({ | |
| "Date": current_time, | |
| "Chip": item["Name"], | |
| "Cloud Rent (/hr)": rent, | |
| "Hardware Price": buy | |
| }) | |
| print(f" -> Fetched {item['Name']}") | |
| return results | |
| def save_to_csv(new_data, filename="./gpu_price_history.csv"): | |
| new_df = pd.DataFrame(new_data) | |
| if os.path.exists(filename): | |
| try: | |
| existing_df = pd.read_csv(filename) | |
| combined_df = pd.concat([existing_df, new_df], ignore_index=True) | |
| combined_df.to_csv(filename, index=False) | |
| print(f"Successfully appended data to {filename}") | |
| except Exception as e: | |
| print(f"Error writing to CSV: {e}") | |
| new_df.to_csv(filename, index=False) | |
| else: | |
| new_df.to_csv(filename, index=False) | |
| print(f"New file created at {filename}") | |
| if __name__ == "__main__": | |
| tracker = RobustHardwareTracker() | |
| data = tracker.collect_data() | |
| save_to_csv(data) | |