Spaces:

Yanbo2
/

Compute-and-Token-Watch

Running

Compute-and-Token-Watch / gpu_price_tracker.py

kz209

upload data

6639e76 22 days ago

6.91 kB

	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	import random
	import time
	import os
	import re
	import statistics
	from datetime import datetime


	class RobustHardwareTracker:

	def __init__(self):
	# Preset fallback prices, used when the crawler is completely blocked
	self.fallback_prices = {
	"H100": "$28,500 - $32,000",
	"V100": "$350 - $650",
	"B300": "Contact Sales (Q4 2025)",
	}

	# Masquerade as real browser request headers (added key fields like Accept, Language)
	self.headers_list = [{
	"User-Agent":
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
	"Accept":
	"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8",
	"Accept-Language": "en-US,en;q=0.9",
	"Accept-Encoding": "gzip, deflate, br",
	"Connection": "keep-alive",
	"Upgrade-Insecure-Requests": "1"
	}, {
	"User-Agent":
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
	"Accept":
	"text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
	"Accept-Language": "en-US,en;q=0.9",
	"Connection": "keep-alive",
	}]

	def get_cloud_price(self, chip):
	"""Get cloud rental prices"""
	urls = {
	"B300":
	"https://getdeploying.com/reference/cloud-gpu/nvidia-dgx-b300",
	"H100": "https://getdeploying.com/reference/cloud-gpu/nvidia-h100",
	"V100": "https://getdeploying.com/reference/cloud-gpu/nvidia-v100",
	}

	try:
	url = urls.get(chip)
	if not url:
	return "N/A"

	h = random.choice(self.headers_list)
	resp = requests.get(url, headers=h, timeout=15)

	if resp.status_code != 200:
	return "N/A"

	soup = BeautifulSoup(resp.text, 'html.parser')

	# Parse GetDeploying table
	table = soup.find("table")
	if table:
	rows = table.find_all("tr")
	prices = []
	for row in rows:
	txt = row.get_text()
	if "$" in txt:
	# Optimized regex: compatible with $2.00, $2, and $1,000.00
	match = re.search(r"\$([0-9,]+(?:\.[0-9]+)?)", txt)
	if match:
	clean_price = float(
	match.group(1).replace(",", ""))
	prices.append(clean_price)
	if prices:
	return f"${min(prices):.2f}/hr"

	return "Sold Out"
	except Exception as e:
	print(str(e))
	return "Check Provider"

	def get_hardware_price(self, chip, search_query):
	"""Get eBay hardware selling prices"""
	if chip == "B300":
	return self.fallback_prices["B300"]

	url = "https://www.ebay.com/sch/i.html"
	params = {
	"_nkw": search_query,
	"LH_Sold": "1",
	"LH_Complete": "1",
	"rt": "nc"
	}

	try:
	# Add random delay to simulate human operation
	time.sleep(random.uniform(2.0, 4.0))
	h = random.choice(self.headers_list)
	resp = requests.get(url, params=params, headers=h, timeout=15)

	soup = BeautifulSoup(resp.text, 'html.parser')
	price_tags = soup.select(".s-item__price")

	prices = []
	for tag in price_tags:
	text = tag.get_text(strip=True)

	# Exclude the first hidden placeholder (Shop on eBay) in eBay search results
	if "Shop on eBay" in text or not text:
	continue

	# Handle price range, take the lowest price
	if "to" in text:
	text = text.split("to")[0]

	# Strengthen regex: extract valid amounts from text
	match = re.search(r'([0-9,]+(?:\.[0-9]{2})?)', text)
	if match:
	try:
	val = float(match.group(1).replace(",", ""))
	# Filter out outliers below $100 (usually accessories, manuals, or pure cooling fans)
	if val > 100:
	prices.append(val)
	except ValueError:
	continue

	# Stop after collecting 10 valid samples
	if len(prices) >= 10:
	break

	if not prices:
	return f"{self.fallback_prices[chip]} (Est)"

	median_val = statistics.median(prices)
	return f"${median_val:,.2f}"

	except Exception as e:
	print(str(e))
	return f"{self.fallback_prices[chip]} (Est)"

	def collect_data(self):
	inventory = [
	{
	"Code": "B300",
	"Name": "Blackwell B300",
	"Query": "NVIDIA B300 GPU"
	},
	{
	"Code": "H100",
	"Name": "Hopper H100",
	"Query": "NVIDIA H100 PCIe 80GB"
	},
	{
	"Code": "V100",
	"Name": "Volta V100",
	"Query": "NVIDIA Tesla V100 16GB PCIe"
	},
	]

	results = []
	current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	print(f"Fetching data [{current_time}]...")
	for item in inventory:
	rent = self.get_cloud_price(item["Code"])
	buy = self.get_hardware_price(item["Code"], item["Query"])

	results.append({
	"Date": current_time,
	"Chip": item["Name"],
	"Cloud Rent (/hr)": rent,
	"Hardware Price": buy
	})
	print(f" -> Fetched {item['Name']}")

	return results

	def save_to_csv(new_data, filename="./gpu_price_history.csv"):
	new_df = pd.DataFrame(new_data)

	if os.path.exists(filename):
	try:
	existing_df = pd.read_csv(filename)
	combined_df = pd.concat([existing_df, new_df], ignore_index=True)
	combined_df.to_csv(filename, index=False)
	print(f"Successfully appended data to {filename}")
	except Exception as e:
	print(f"Error writing to CSV: {e}")
	new_df.to_csv(filename, index=False)
	else:
	new_df.to_csv(filename, index=False)
	print(f"New file created at {filename}")

	if __name__ == "__main__":
	tracker = RobustHardwareTracker()
	data = tracker.collect_data()
	save_to_csv(data)