Compute-and-Token-Watch / gpu_price_tracker.py
kz209
upload data
6639e76
import requests
from bs4 import BeautifulSoup
import pandas as pd
import random
import time
import os
import re
import statistics
from datetime import datetime
class RobustHardwareTracker:
def __init__(self):
# Preset fallback prices, used when the crawler is completely blocked
self.fallback_prices = {
"H100": "$28,500 - $32,000",
"V100": "$350 - $650",
"B300": "Contact Sales (Q4 2025)",
}
# Masquerade as real browser request headers (added key fields like Accept, Language)
self.headers_list = [{
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
"Accept":
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1"
}, {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
"Accept":
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Connection": "keep-alive",
}]
def get_cloud_price(self, chip):
"""Get cloud rental prices"""
urls = {
"B300":
"https://getdeploying.com/reference/cloud-gpu/nvidia-dgx-b300",
"H100": "https://getdeploying.com/reference/cloud-gpu/nvidia-h100",
"V100": "https://getdeploying.com/reference/cloud-gpu/nvidia-v100",
}
try:
url = urls.get(chip)
if not url:
return "N/A"
h = random.choice(self.headers_list)
resp = requests.get(url, headers=h, timeout=15)
if resp.status_code != 200:
return "N/A"
soup = BeautifulSoup(resp.text, 'html.parser')
# Parse GetDeploying table
table = soup.find("table")
if table:
rows = table.find_all("tr")
prices = []
for row in rows:
txt = row.get_text()
if "$" in txt:
# Optimized regex: compatible with $2.00, $2, and $1,000.00
match = re.search(r"\$([0-9,]+(?:\.[0-9]+)?)", txt)
if match:
clean_price = float(
match.group(1).replace(",", ""))
prices.append(clean_price)
if prices:
return f"${min(prices):.2f}/hr"
return "Sold Out"
except Exception as e:
print(str(e))
return "Check Provider"
def get_hardware_price(self, chip, search_query):
"""Get eBay hardware selling prices"""
if chip == "B300":
return self.fallback_prices["B300"]
url = "https://www.ebay.com/sch/i.html"
params = {
"_nkw": search_query,
"LH_Sold": "1",
"LH_Complete": "1",
"rt": "nc"
}
try:
# Add random delay to simulate human operation
time.sleep(random.uniform(2.0, 4.0))
h = random.choice(self.headers_list)
resp = requests.get(url, params=params, headers=h, timeout=15)
soup = BeautifulSoup(resp.text, 'html.parser')
price_tags = soup.select(".s-item__price")
prices = []
for tag in price_tags:
text = tag.get_text(strip=True)
# Exclude the first hidden placeholder (Shop on eBay) in eBay search results
if "Shop on eBay" in text or not text:
continue
# Handle price range, take the lowest price
if "to" in text:
text = text.split("to")[0]
# Strengthen regex: extract valid amounts from text
match = re.search(r'([0-9,]+(?:\.[0-9]{2})?)', text)
if match:
try:
val = float(match.group(1).replace(",", ""))
# Filter out outliers below $100 (usually accessories, manuals, or pure cooling fans)
if val > 100:
prices.append(val)
except ValueError:
continue
# Stop after collecting 10 valid samples
if len(prices) >= 10:
break
if not prices:
return f"{self.fallback_prices[chip]} (Est)"
median_val = statistics.median(prices)
return f"${median_val:,.2f}"
except Exception as e:
print(str(e))
return f"{self.fallback_prices[chip]} (Est)"
def collect_data(self):
inventory = [
{
"Code": "B300",
"Name": "Blackwell B300",
"Query": "NVIDIA B300 GPU"
},
{
"Code": "H100",
"Name": "Hopper H100",
"Query": "NVIDIA H100 PCIe 80GB"
},
{
"Code": "V100",
"Name": "Volta V100",
"Query": "NVIDIA Tesla V100 16GB PCIe"
},
]
results = []
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Fetching data [{current_time}]...")
for item in inventory:
rent = self.get_cloud_price(item["Code"])
buy = self.get_hardware_price(item["Code"], item["Query"])
results.append({
"Date": current_time,
"Chip": item["Name"],
"Cloud Rent (/hr)": rent,
"Hardware Price": buy
})
print(f" -> Fetched {item['Name']}")
return results
def save_to_csv(new_data, filename="./gpu_price_history.csv"):
new_df = pd.DataFrame(new_data)
if os.path.exists(filename):
try:
existing_df = pd.read_csv(filename)
combined_df = pd.concat([existing_df, new_df], ignore_index=True)
combined_df.to_csv(filename, index=False)
print(f"Successfully appended data to {filename}")
except Exception as e:
print(f"Error writing to CSV: {e}")
new_df.to_csv(filename, index=False)
else:
new_df.to_csv(filename, index=False)
print(f"New file created at {filename}")
if __name__ == "__main__":
tracker = RobustHardwareTracker()
data = tracker.collect_data()
save_to_csv(data)