import requests
from bs4 import BeautifulSoup
import pandas as pd
import random
import time
import os
import re
import statistics
from datetime import datetime


class RobustHardwareTracker:

    def __init__(self):
        # Preset fallback prices, used when the crawler is completely blocked
        self.fallback_prices = {
            "H100": "$28,500 - $32,000",
            "V100": "$350 - $650",
            "B300": "Contact Sales (Q4 2025)",
        }

        # Masquerade as real browser request headers (added key fields like Accept, Language)
        self.headers_list = [{
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.9",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1"
        }, {
            "User-Agent":
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.9",
            "Connection": "keep-alive",
        }]

    def get_cloud_price(self, chip):
        """Get cloud rental prices"""
        urls = {
            "B300":
            "https://getdeploying.com/reference/cloud-gpu/nvidia-dgx-b300",
            "H100": "https://getdeploying.com/reference/cloud-gpu/nvidia-h100",
            "V100": "https://getdeploying.com/reference/cloud-gpu/nvidia-v100",
        }

        try:
            url = urls.get(chip)
            if not url:
                return "N/A"

            h = random.choice(self.headers_list)
            resp = requests.get(url, headers=h, timeout=15)

            if resp.status_code != 200:
                return "N/A"

            soup = BeautifulSoup(resp.text, 'html.parser')

            # Parse GetDeploying table
            table = soup.find("table")
            if table:
                rows = table.find_all("tr")
                prices = []
                for row in rows:
                    txt = row.get_text()
                    if "$" in txt:
                        # Optimized regex: compatible with $2.00, $2, and $1,000.00
                        match = re.search(r"\$([0-9,]+(?:\.[0-9]+)?)", txt)
                        if match:
                            clean_price = float(
                                match.group(1).replace(",", ""))
                            prices.append(clean_price)
                if prices:
                    return f"${min(prices):.2f}/hr"

            return "Sold Out"
        except Exception as e:
            print(str(e))
            return "Check Provider"

    def get_hardware_price(self, chip, search_query):
        """Get eBay hardware selling prices"""
        if chip == "B300":
            return self.fallback_prices["B300"]

        url = "https://www.ebay.com/sch/i.html"
        params = {
            "_nkw": search_query,
            "LH_Sold": "1",
            "LH_Complete": "1",
            "rt": "nc"
        }

        try:
            # Add random delay to simulate human operation
            time.sleep(random.uniform(2.0, 4.0))
            h = random.choice(self.headers_list)
            resp = requests.get(url, params=params, headers=h, timeout=15)

            soup = BeautifulSoup(resp.text, 'html.parser')
            price_tags = soup.select(".s-item__price")

            prices = []
            for tag in price_tags:
                text = tag.get_text(strip=True)

                # Exclude the first hidden placeholder (Shop on eBay) in eBay search results
                if "Shop on eBay" in text or not text:
                    continue

                # Handle price range, take the lowest price
                if "to" in text:
                    text = text.split("to")[0]

                # Strengthen regex: extract valid amounts from text
                match = re.search(r'([0-9,]+(?:\.[0-9]{2})?)', text)
                if match:
                    try:
                        val = float(match.group(1).replace(",", ""))
                        # Filter out outliers below $100 (usually accessories, manuals, or pure cooling fans)
                        if val > 100:
                            prices.append(val)
                    except ValueError:
                        continue

                # Stop after collecting 10 valid samples
                if len(prices) >= 10:
                    break

            if not prices:
                return f"{self.fallback_prices[chip]} (Est)"

            median_val = statistics.median(prices)
            return f"${median_val:,.2f}"

        except Exception as e:
            print(str(e))
            return f"{self.fallback_prices[chip]} (Est)"

    def collect_data(self):
        inventory = [
            {
                "Code": "B300",
                "Name": "Blackwell B300",
                "Query": "NVIDIA B300 GPU"
            },
            {
                "Code": "H100",
                "Name": "Hopper H100",
                "Query": "NVIDIA H100 PCIe 80GB"
            },
            {
                "Code": "V100",
                "Name": "Volta V100",
                "Query": "NVIDIA Tesla V100 16GB PCIe"
            },
        ]

        results = []
        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        print(f"Fetching data [{current_time}]...")
        for item in inventory:
            rent = self.get_cloud_price(item["Code"])
            buy = self.get_hardware_price(item["Code"], item["Query"])

            results.append({
                "Date": current_time,
                "Chip": item["Name"],
                "Cloud Rent (/hr)": rent,
                "Hardware Price": buy
            })
            print(f" -> Fetched {item['Name']}")

        return results

def save_to_csv(new_data, filename="./gpu_price_history.csv"):
    new_df = pd.DataFrame(new_data)

    if os.path.exists(filename):
        try:
            existing_df = pd.read_csv(filename)
            combined_df = pd.concat([existing_df, new_df], ignore_index=True)
            combined_df.to_csv(filename, index=False)
            print(f"Successfully appended data to {filename}")
        except Exception as e:
            print(f"Error writing to CSV: {e}")
            new_df.to_csv(filename, index=False)
    else:
        new_df.to_csv(filename, index=False)
        print(f"New file created at {filename}")

if __name__ == "__main__":
    tracker = RobustHardwareTracker()
    data = tracker.collect_data()
    save_to_csv(data)