Spaces:

Yanbo2
/

Compute-and-Token-Watch

Sleeping

App Files Files Community

kz209 commited on Feb 24

Commit

6639e76

1 Parent(s): 9be9232

upload data

Browse files

Files changed (9) hide show

Dockerfile +27 -0
app.py +102 -57
convert_llm_price.py +98 -0
crontab_job +1 -0
gpu_info_collector.sh +4 -0
gpu_price_history.csv +0 -0
gpu_price_tracker.py +200 -0
llm_price_trends.csv +0 -0
llm_token_tracker.py +165 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.9-slim-buster
+WORKDIR /app
+# Install cron
+RUN apt-get update && apt-get install -y cron
+# Copy application files
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# Give execution rights to the script
+RUN chmod +x gpu_info_collector.sh
+COPY crontab_job /etc/cron.d/crontab_job
+# Give execution rights on the cron job
+RUN chmod 0644 /etc/cron.d/crontab_job
+# Apply cron job
+RUN crontab /etc/cron.d/crontab_job
+# Start cron in the background and run the Python app
+CMD cron && python app.py

app.py CHANGED Viewed

@@ -1,14 +1,46 @@
 import gradio as gr
 import pandas as pd
 import plotly.express as px
 import re
 import io
-import os
 # ==========================================
-# 1. 数据读取引擎 (防弹版 - 保持不变)
 # ==========================================
 def clean_and_read_file(file_path):
     """
     Robust file reader:
@@ -23,8 +55,8 @@ def clean_and_read_file(file_path):
     try:
         df = pd.read_excel(file_path)
         return df
-    except:
-        pass
     # --- Strategy B: Read as Text ---
     raw_data = b""
@@ -48,51 +80,59 @@ def clean_and_read_file(file_path):
     # --- Cleaning ---
     content = re.sub(r"\\", "", content)
     lines = content.splitlines()
     cleaned_lines = []
     buffer = ""
-    date_pattern = re.compile(r'^\s*202\d-\d{2}-\d{2}')
     for line in lines:
         line = line.strip()
-        if not line: continue
         is_header = "Date" in line and ("," in line)
         is_date_row = date_pattern.match(line) is not None
         if is_header or is_date_row:
-            if buffer: cleaned_lines.append(buffer)
             buffer = line
         else:
             buffer += " " + line
-    if buffer: cleaned_lines.append(buffer)
     csv_content = "\n".join(cleaned_lines)
     try:
         df = pd.read_csv(io.StringIO(csv_content))
-    except:
         try:
-            df = pd.read_csv(io.StringIO(csv_content), sep=None, engine='python')
-        except:
             return pd.DataFrame()
     return df
 # ==========================================
-# 2. 数据处理
 # ==========================================
 def process_gpu_data(df):
-    if df.empty: return df
     df.columns = [str(c).strip() for c in df.columns]
     if 'Date' in df.columns:
         df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
     def clean_currency(x):
-        if isinstance(x, (int, float)): return float(x)
         if isinstance(x, str):
             match = re.search(r'(\d+\.?\d*)', x)
             return float(match.group(1)) if match else 0.0
@@ -106,60 +146,65 @@ def process_gpu_data(df):
             if 'Rent' in c or '/hr' in c:
                 target_col = c
                 break
     if target_col:
         df['Rent_Price_Num'] = df[target_col].apply(clean_currency)
     return df
 def process_llm_data(df):
-    if df.empty: return df
     df.columns = [str(c).strip() for c in df.columns]
     if 'Date' in df.columns:
         df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
     return df
 # ==========================================
-# 3. 绘图逻辑
 # ==========================================
 def plot_gpu_trends(df):
     if df is None or df.empty or 'Rent_Price_Num' not in df.columns:
         return None
     plot_df = df.dropna(subset=['Date', 'Rent_Price_Num'])
-    if plot_df.empty: return None
     chip_col = 'Chip' if 'Chip' in df.columns else df.columns[1]
-    fig = px.line(
-        plot_df,
-        x='Date',
-        y='Rent_Price_Num',
-        color=chip_col if chip_col in df.columns else None,
-        title='GPU Cloud Rental Price Trends ($/hr)',
-        labels={'Rent_Price_Num': 'Price ($/hr)', 'Date': 'Date'},
-        markers=True
-    )
     return fig
 def plot_llm_trends(df):
-    """绘制所有列的趋势，不再需要 selection"""
     if df is None or df.empty:
         return None
-    # 自动选择除了 Date 以外的所有列
     value_vars = [c for c in df.columns if c != 'Date']
     if not value_vars:
         return None
     plot_df = df[['Date'] + value_vars].copy().dropna(subset=['Date'])
     # Melt
     df_long = plot_df.melt(id_vars=['Date'], var_name='Model', value_name='Price')
     fig = px.line(
         df_long,
         x='Date',
@@ -172,10 +217,10 @@ def plot_llm_trends(df):
     return fig
 # ==========================================
-# 4. Gradio 界面
 # ==========================================
-DEFAULT_GPU_FILE = "gpu_price_history.csv"
 DEFAULT_LLM_FILE = "llm_price_trends.csv"
 def load_gpu_pipeline():
@@ -191,7 +236,7 @@ def load_llm_pipeline():
 # --- UI Definition ---
 with gr.Blocks(title="AI Price Tracker") as demo:
     gr.Markdown("## 📊 AI Compute & Model Price Trends")
     with gr.Tabs():
         # GPU Tab
         with gr.TabItem("GPU Prices"):
@@ -205,10 +250,10 @@ with gr.Blocks(title="AI Price Tracker") as demo:
         # LLM Tab (Updated: No Filter)
         with gr.TabItem("LLM Prices"):
             with gr.Row():
-                # 直接展示图表，不分栏
                 with gr.Column(scale=1):
                     llm_plot = gr.Plot(label="Price Trend")
             with gr.Row():
                 with gr.Accordion("Data Preview", open=False):
                     llm_table = gr.DataFrame()
@@ -217,10 +262,10 @@ with gr.Blocks(title="AI Price Tracker") as demo:
     def init_on_load():
         # Load GPU
         g_df, g_fig = load_gpu_pipeline()
         # Load LLM (No checkbox needed anymore)
         l_df, l_fig = load_llm_pipeline()
         return (
             g_fig,      # gpu_plot
             g_df,       # gpu_table
@@ -228,17 +273,17 @@ with gr.Blocks(title="AI Price Tracker") as demo:
             l_df        # llm_table
         )
-    # 绑定加载事件
     demo.load(
-        init_on_load,
-        inputs=None,
         outputs=[
-            gpu_plot,
-            gpu_table,
-            llm_plot,
             llm_table
         ]
     )
 if __name__ == "__main__":
-    demo.launch(share=True)

+import os
 import gradio as gr
 import pandas as pd
 import plotly.express as px
 import re
 import io
+import subprocess
+os.system("chmod +x gpu_info_collector.sh")
+# 1. 定义运行脚本的函数
+def run_shell_script(secret_key):
+    # 安全检查：比对暗号，防止路人随便触发
+    # 记得在 Space 的 Settings -> Variables and secrets 里设置一个环境变量叫 "RUN_KEY"
+    expected_key = os.environ.get("RUN_KEY")
+    if secret_key != expected_key:
+        return "❌ 鉴权失败：暗号错误！"
+    print("收到指令，开始运行脚本...")
+    # 2. 核心：运行 .sh 文件
+    # capture_output=True 可以让我们看到脚本输出的日志
+    try:
+        result = subprocess.run(
+            ["./myscript.sh"],
+            shell=True,
+            capture_output=True,
+            text=True
+        )
+        log_output = f"Standard Output:\n{result.stdout}\n\nError Output:\n{result.stderr}"
+        print(log_output) # 这会打印到 Space 的 Logs 里
+        return f"✅ 脚本运行完毕！\n{log_output}"
+    except Exception as e:
+        return f"⚠️ 运行出错: {str(e)}"
 # ==========================================
+# 1. Data Reading Engine
 # ==========================================
 def clean_and_read_file(file_path):
     """
     Robust file reader:
     try:
         df = pd.read_excel(file_path)
         return df
+    except Exception:
+        pass
     # --- Strategy B: Read as Text ---
     raw_data = b""
     # --- Cleaning ---
     content = re.sub(r"\\", "", content)
     lines = content.splitlines()
     cleaned_lines = []
     buffer = ""
+    date_pattern = re.compile(r'^\s*202\d-\d{2}-\d{2}')
     for line in lines:
         line = line.strip()
+        if not line:
+            continue
         is_header = "Date" in line and ("," in line)
         is_date_row = date_pattern.match(line) is not None
         if is_header or is_date_row:
+            if buffer:
+                cleaned_lines.append(buffer)
             buffer = line
         else:
             buffer += " " + line
+    if buffer:
+        cleaned_lines.append(buffer)
     csv_content = "\n".join(cleaned_lines)
     try:
         df = pd.read_csv(io.StringIO(csv_content))
+    except Exception:
         try:
+            df = pd.read_csv(io.StringIO(csv_content),
+                             sep=None,
+                             engine='python')
+        except Exception:
             return pd.DataFrame()
     return df
 # ==========================================
+# 2. Data Processing
 # ==========================================
 def process_gpu_data(df):
+    if df.empty:
+        return df
     df.columns = [str(c).strip() for c in df.columns]
     if 'Date' in df.columns:
         df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
     def clean_currency(x):
+        if isinstance(x, (int, float)):
+            return float(x)
         if isinstance(x, str):
             match = re.search(r'(\d+\.?\d*)', x)
             return float(match.group(1)) if match else 0.0
             if 'Rent' in c or '/hr' in c:
                 target_col = c
                 break
     if target_col:
         df['Rent_Price_Num'] = df[target_col].apply(clean_currency)
     return df
 def process_llm_data(df):
+    if df.empty:
+        return df
     df.columns = [str(c).strip() for c in df.columns]
     if 'Date' in df.columns:
         df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
     return df
 # ==========================================
+# 3. Plotting Logic
 # ==========================================
 def plot_gpu_trends(df):
     if df is None or df.empty or 'Rent_Price_Num' not in df.columns:
         return None
     plot_df = df.dropna(subset=['Date', 'Rent_Price_Num'])
+    if plot_df.empty:
+        return None
     chip_col = 'Chip' if 'Chip' in df.columns else df.columns[1]
+    fig = px.line(plot_df,
+                  x='Date',
+                  y='Rent_Price_Num',
+                  color=chip_col if chip_col in df.columns else None,
+                  title='GPU Cloud Rental Price Trends ($/hr)',
+                  labels={
+                      'Rent_Price_Num': 'Price ($/hr)',
+                      'Date': 'Date'
+                  },
+                  markers=True)
     return fig
 def plot_llm_trends(df):
+    """Plot trends for all columns, no selection needed anymore"""
     if df is None or df.empty:
         return None
+    # Automatically select all columns except Date
     value_vars = [c for c in df.columns if c != 'Date']
     if not value_vars:
         return None
     plot_df = df[['Date'] + value_vars].copy().dropna(subset=['Date'])
     # Melt
     df_long = plot_df.melt(id_vars=['Date'], var_name='Model', value_name='Price')
     fig = px.line(
         df_long,
         x='Date',
     return fig
 # ==========================================
+# 4. Gradio Interface
 # ==========================================
+DEFAULT_GPU_FILE = "gpu_price_history.csv"
 DEFAULT_LLM_FILE = "llm_price_trends.csv"
 def load_gpu_pipeline():
 # --- UI Definition ---
 with gr.Blocks(title="AI Price Tracker") as demo:
     gr.Markdown("## 📊 AI Compute & Model Price Trends")
     with gr.Tabs():
         # GPU Tab
         with gr.TabItem("GPU Prices"):
         # LLM Tab (Updated: No Filter)
         with gr.TabItem("LLM Prices"):
             with gr.Row():
+                # Display chart directly, no column division
                 with gr.Column(scale=1):
                     llm_plot = gr.Plot(label="Price Trend")
             with gr.Row():
                 with gr.Accordion("Data Preview", open=False):
                     llm_table = gr.DataFrame()
     def init_on_load():
         # Load GPU
         g_df, g_fig = load_gpu_pipeline()
         # Load LLM (No checkbox needed anymore)
         l_df, l_fig = load_llm_pipeline()
         return (
             g_fig,      # gpu_plot
             g_df,       # gpu_table
             l_df        # llm_table
         )
+    # Bind load event
     demo.load(
+        init_on_load,
+        inputs=None,
         outputs=[
+            gpu_plot,
+            gpu_table,
+            llm_plot,
             llm_table
         ]
     )
 if __name__ == "__main__":
+    demo.launch(share=True)

convert_llm_price.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import pandas as pd
+excel_file = "/Users/yukino/Downloads/May_change_the_world/Compute-and-Token-Watch/llm_price_trends.xlsx"
+csv_file = "/Users/yukino/Downloads/May_change_the_world/Compute-and-Token-Watch/llm_price_trends.csv"
+try:
+    df = pd.read_excel(excel_file)
+    # Define the correct column names based on llm_token_tracker.py
+    target_columns = [
+        "Date",
+        "Closed-Reasoning (o1/Opus) Input",
+        "Closed-Reasoning (o1/Opus) Output",
+        "Closed-Flagship (GPT4o/Sonnet) Input",
+        "Closed-Flagship (GPT4o/Sonnet) Output",
+        "Closed-Economy (Mini/Flash) Input",
+        "Closed-Economy (Mini/Flash) Output",
+        "Open-Large (>100B/V3) Input",
+        "Open-Large (>100B/V3) Output",
+        "Open-Medium (20-100B) Input",
+        "Open-Medium (20-100B) Output",
+        "Open-Small (<20B) Input",
+        "Open-Small (<20B) Output"
+    ]
+    # Assuming the existing XLSX columns might have slightly different names
+    # We need to create a mapping if they are not exact.
+    # For simplicity, let's assume the first row of the XLSX contains the current column names.
+    # If the XLSX has different column names, this will need to be adjusted.
+    # We will try to rename columns if there are close matches, otherwise, we'll rely on order.
+    # For a robust solution, we would need to inspect the actual XLSX column headers.
+    # If the XLSX file has fewer columns or different order, this might cause issues.
+    # For now, let's assume the existing columns in XLSX are a subset or can be mapped directly.
+    # Rename columns to match the target_columns. This is a crucial step.
+    # Without knowing the exact column names in the original XLSX, I'll make a best effort guess.
+    # For now, I will just reassign columns if the length matches, which is risky.
+    # A safer approach is to check if the column names are present and rename.
+    # If the number of columns in the DataFrame matches the target_columns, assign directly.
+    if len(df.columns) == len(target_columns):
+        df.columns = target_columns
+    else:
+        # If the number of columns doesn't match, we need a more sophisticated mapping.
+        # For now, I will print a warning and proceed with a simple rename attempt.
+        print("Warning: Number of columns in XLSX does not match expected target columns. Attempting best-effort rename.")
+        # Create a dictionary for renaming, assuming order might be slightly off but names are similar.
+        # This part needs careful review if the XLSX has very different headers.
+        # A safer approach would be to manually inspect the XLSX and create a specific mapping.
+        # For this automated task, I'll try to map based on substrings or common patterns.
+        # However, given the complexity of dynamic column names, direct assignment is simplest if count matches.
+        # If the columns don't match, we will try to infer mapping based on content or patterns.
+        # Since I don't have access to the XLSX content, I will proceed with a simple mapping
+        # that assumes the order is generally correct but names might differ.
+        # This part is highly dependent on the actual XLSX column names.
+        # I'll assume a direct mapping if the user wants to convert existing xlsx to csv.
+        # The `llm_token_tracker.py` produces very specific column names.
+        # It's likely the existing xlsx has similar names.
+        # Best effort mapping based on the order and a few keywords
+        # This part requires manual adjustment if the XLSX has significantly different headers
+        # For now, I will assume the first column is Date, and then pairs of Input/Output columns.
+        # This is a very strong assumption without seeing the XLSX.
+        # Let's try to map by matching the start of the column names.
+        # This is still an assumption. The best way is to know the actual XLSX column names.
+        # Since the problem asks to convert to CSV and rename columns, and I have the target column names,
+        # I will enforce these target columns. If the original XLSX doesn't have them, they will be NaN.
+        # Create a new DataFrame with target columns and populate from existing if names match.
+        new_df = pd.DataFrame(columns=target_columns)
+        for col in target_columns:
+            if col in df.columns:
+                new_df[col] = df[col]
+            else:
+                # Attempt to find a close match, e.g., 'Closed-Reasoning Input' might be 'Closed-Reasoning (o1/Opus) Input'
+                # This is getting too complex without actual XLSX column names.
+                # Let's stick to direct column assignment if possible, or leave as is for unmatched.
+                # For now, I will try to rename by simple string matching, and if not found, fill with NA.
+                # This is a simplification. The user needs to verify if the output CSV is correct.
+                pass # The existing rename handles exact matches. Remaining will be NA if not found.
+        df = new_df # Replace original df with the new one with target columns
+    df.to_csv(csv_file, index=False)
+    print(f"Successfully converted {excel_file} to {csv_file}")
+except FileNotFoundError:
+    print(f"Error: {excel_file} not found.")
+except Exception as e:
+    print(f"An error occurred during conversion: {e}")

crontab_job ADDED Viewed

	@@ -0,0 +1 @@


1	+ 0 1 * * * /app/gpu_info_collector.sh >> /app/cron.log 2>&1

gpu_info_collector.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+#/bin/bash
+python3 llm_token_tracker.py
+python3 gpu_price_tracker.py

gpu_price_history.csv CHANGED Viewed

Binary files a/gpu_price_history.csv and b/gpu_price_history.csv differ

gpu_price_tracker.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+import random
+import time
+import os
+import re
+import statistics
+from datetime import datetime
+class RobustHardwareTracker:
+    def __init__(self):
+        # Preset fallback prices, used when the crawler is completely blocked
+        self.fallback_prices = {
+            "H100": "$28,500 - $32,000",
+            "V100": "$350 - $650",
+            "B300": "Contact Sales (Q4 2025)",
+        }
+        # Masquerade as real browser request headers (added key fields like Accept, Language)
+        self.headers_list = [{
+            "User-Agent":
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
+            "Accept":
+            "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Accept-Encoding": "gzip, deflate, br",
+            "Connection": "keep-alive",
+            "Upgrade-Insecure-Requests": "1"
+        }, {
+            "User-Agent":
+            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
+            "Accept":
+            "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Connection": "keep-alive",
+        }]
+    def get_cloud_price(self, chip):
+        """Get cloud rental prices"""
+        urls = {
+            "B300":
+            "https://getdeploying.com/reference/cloud-gpu/nvidia-dgx-b300",
+            "H100": "https://getdeploying.com/reference/cloud-gpu/nvidia-h100",
+            "V100": "https://getdeploying.com/reference/cloud-gpu/nvidia-v100",
+        }
+        try:
+            url = urls.get(chip)
+            if not url:
+                return "N/A"
+            h = random.choice(self.headers_list)
+            resp = requests.get(url, headers=h, timeout=15)
+            if resp.status_code != 200:
+                return "N/A"
+            soup = BeautifulSoup(resp.text, 'html.parser')
+            # Parse GetDeploying table
+            table = soup.find("table")
+            if table:
+                rows = table.find_all("tr")
+                prices = []
+                for row in rows:
+                    txt = row.get_text()
+                    if "$" in txt:
+                        # Optimized regex: compatible with $2.00, $2, and $1,000.00
+                        match = re.search(r"\$([0-9,]+(?:\.[0-9]+)?)", txt)
+                        if match:
+                            clean_price = float(
+                                match.group(1).replace(",", ""))
+                            prices.append(clean_price)
+                if prices:
+                    return f"${min(prices):.2f}/hr"
+            return "Sold Out"
+        except Exception as e:
+            print(str(e))
+            return "Check Provider"
+    def get_hardware_price(self, chip, search_query):
+        """Get eBay hardware selling prices"""
+        if chip == "B300":
+            return self.fallback_prices["B300"]
+        url = "https://www.ebay.com/sch/i.html"
+        params = {
+            "_nkw": search_query,
+            "LH_Sold": "1",
+            "LH_Complete": "1",
+            "rt": "nc"
+        }
+        try:
+            # Add random delay to simulate human operation
+            time.sleep(random.uniform(2.0, 4.0))
+            h = random.choice(self.headers_list)
+            resp = requests.get(url, params=params, headers=h, timeout=15)
+            soup = BeautifulSoup(resp.text, 'html.parser')
+            price_tags = soup.select(".s-item__price")
+            prices = []
+            for tag in price_tags:
+                text = tag.get_text(strip=True)
+                # Exclude the first hidden placeholder (Shop on eBay) in eBay search results
+                if "Shop on eBay" in text or not text:
+                    continue
+                # Handle price range, take the lowest price
+                if "to" in text:
+                    text = text.split("to")[0]
+                # Strengthen regex: extract valid amounts from text
+                match = re.search(r'([0-9,]+(?:\.[0-9]{2})?)', text)
+                if match:
+                    try:
+                        val = float(match.group(1).replace(",", ""))
+                        # Filter out outliers below $100 (usually accessories, manuals, or pure cooling fans)
+                        if val > 100:
+                            prices.append(val)
+                    except ValueError:
+                        continue
+                # Stop after collecting 10 valid samples
+                if len(prices) >= 10:
+                    break
+            if not prices:
+                return f"{self.fallback_prices[chip]} (Est)"
+            median_val = statistics.median(prices)
+            return f"${median_val:,.2f}"
+        except Exception as e:
+            print(str(e))
+            return f"{self.fallback_prices[chip]} (Est)"
+    def collect_data(self):
+        inventory = [
+            {
+                "Code": "B300",
+                "Name": "Blackwell B300",
+                "Query": "NVIDIA B300 GPU"
+            },
+            {
+                "Code": "H100",
+                "Name": "Hopper H100",
+                "Query": "NVIDIA H100 PCIe 80GB"
+            },
+            {
+                "Code": "V100",
+                "Name": "Volta V100",
+                "Query": "NVIDIA Tesla V100 16GB PCIe"
+            },
+        ]
+        results = []
+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        print(f"Fetching data [{current_time}]...")
+        for item in inventory:
+            rent = self.get_cloud_price(item["Code"])
+            buy = self.get_hardware_price(item["Code"], item["Query"])
+            results.append({
+                "Date": current_time,
+                "Chip": item["Name"],
+                "Cloud Rent (/hr)": rent,
+                "Hardware Price": buy
+            })
+            print(f" -> Fetched {item['Name']}")
+        return results
+def save_to_csv(new_data, filename="./gpu_price_history.csv"):
+    new_df = pd.DataFrame(new_data)
+    if os.path.exists(filename):
+        try:
+            existing_df = pd.read_csv(filename)
+            combined_df = pd.concat([existing_df, new_df], ignore_index=True)
+            combined_df.to_csv(filename, index=False)
+            print(f"Successfully appended data to {filename}")
+        except Exception as e:
+            print(f"Error writing to CSV: {e}")
+            new_df.to_csv(filename, index=False)
+    else:
+        new_df.to_csv(filename, index=False)
+        print(f"New file created at {filename}")
+if __name__ == "__main__":
+    tracker = RobustHardwareTracker()
+    data = tracker.collect_data()
+    save_to_csv(data)

llm_price_trends.csv CHANGED Viewed

Binary files a/llm_price_trends.csv and b/llm_price_trends.csv differ

llm_token_tracker.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import requests
+import pandas as pd
+from datetime import datetime
+import os
+import re
+class LLMPriceMonitor:
+    def __init__(self):
+        self.api_url = "https://openrouter.ai/api/v1/models"
+        self.file_name = "./llm_price_trends.csv"
+        # Define a list of closed-source providers
+        self.closed_providers = ["openai", "anthropic", "google"]
+        # Classification rules for closed-source models by keywords
+        self.closed_keywords = {
+            "Reasoning (Reasoning/High-IQ)": ["gpt-5.2-pro", "claude-opus-4.6", "gemini-3.1-pro-preview"],
+            "Flagship (General Flagship)": ["gpt-5.2", "claude-sonnet-4.6"],
+            "Economy (Economic/Lightweight)": ["gpt-5-mini", "claude-haiku-4.5", "gemini-3-flash-preview"]
+        }
+    def fetch_models(self):
+        """Fetch real-time data from OpenRouter API"""
+        print(f"[{datetime.now().strftime('%H:%M:%S')}] Connecting to OpenRouter API...")
+        try:
+            resp = requests.get(self.api_url, timeout=20)
+            if resp.status_code == 200:
+                return resp.json().get("data", [])
+            print("API request failed")
+            return []
+        except Exception as e:
+            print(f"Network error: {e}")
+            return []
+    def parse_parameter_size(self, model_id, model_name):
+        """Attempt to extract parameter size (e.g., 70b, 405b) from model ID"""
+        # Common alias mappings
+        if "deepseek-v3" in model_id or "deepseek-chat" in model_id:
+            return 671 # DeepSeek V3 is MoE 671B
+        if "deepseek-r1" in model_id:
+            return 671
+        # Regex to extract number+b (e.g., 70b, 8b)
+        text = (model_id + " " + model_name).lower()
+        match = re.search(r"(\d+)b", text)
+        if match:
+            return int(match.group(1))
+        return 0
+    def categorize_and_calculate(self, models):
+        """Core logic: categorize and calculate average price"""
+        stats = {
+            "Date": datetime.now().strftime("%Y-%m-%d %H:%M"),
+            # Closed-source
+            "Closed-Reasoning_In": [], "Closed-Reasoning_Out": [],
+            "Closed-Flagship_In": [], "Closed-Flagship_Out": [],
+            "Closed-Economy_In": [], "Closed-Economy_Out": [],
+            # Open-source
+            "Open-Small(<20B)_In": [], "Open-Small(<20B)_Out": [],
+            "Open-Medium(20-100B)_In": [], "Open-Medium(20-100B)_Out": [],
+            "Open-Large(>100B)_In": [], "Open-Large(>100B)_Out": [],
+        }
+        print("Categorizing and cleaning data...")
+        for m in models:
+            mid = m.get("id", "").lower()
+            name = m.get("name", "").lower()
+            # Get prices (convert units to $/1M tokens)
+            try:
+                p_in = float(m["pricing"]["prompt"]) * 1_000_000
+                p_out = float(m["pricing"]["completion"]) * 1_000_000
+                # Filter out 0-cost models (free/test versions can lower average) and abnormally high-priced test models
+                if p_in <= 0 or p_in > 200:
+                    continue
+            except (ValueError, TypeError):
+                continue
+            # --- Determine if it's Closed-source or Open-source ---
+            is_closed = any(p in mid for p in self.closed_providers)
+            if is_closed:
+                # Closed-source logic: categorize by keywords
+                if any(k in mid for k in self.closed_keywords["Reasoning (Reasoning/High-IQ)"]):
+                    stats["Closed-Reasoning_In"].append(p_in)
+                    stats["Closed-Reasoning_Out"].append(p_out)
+                elif any(k in mid for k in self.closed_keywords["Economy (Economic/Lightweight)"]):
+                    stats["Closed-Economy_In"].append(p_in)
+                    stats["Closed-Economy_Out"].append(p_out)
+                elif any(k in mid for k in self.closed_keywords["Flagship (General Flagship)"]):
+                    # Exclude cases that also contain mini/flash (to prevent gpt-4o-mini from entering the flagship group)
+                    if not any(x in mid for x in ["mini", "flash", "haiku"]):
+                        stats["Closed-Flagship_In"].append(p_in)
+                        stats["Closed-Flagship_Out"].append(p_out)
+            else:
+                # Open-source logic: categorize by parameter size
+                size = self.parse_parameter_size(mid, name)
+                if size > 0:
+                    if size < 20:
+                        stats["Open-Small(<20B)_In"].append(p_in)
+                        stats["Open-Small(<20B)_Out"].append(p_out)
+                    elif 20 <= size <= 100:
+                        stats["Open-Medium(20-100B)_In"].append(p_in)
+                        stats["Open-Medium(20-100B)_Out"].append(p_out)
+                    else: # > 100
+                        stats["Open-Large(>100B)_In"].append(p_in)
+                        stats["Open-Large(>100B)_Out"].append(p_out)
+        # --- Calculate Averages ---
+        final_row = {"Date": stats["Date"]}
+        # Helper function: calculate average and format
+        def calc_avg(key_prefix):
+            list_in = stats.get(f"{key_prefix}_In", [])
+            list_out = stats.get(f"{key_prefix}_Out", [])
+            avg_in = sum(list_in)/len(list_in) if list_in else 0
+            avg_out = sum(list_out)/len(list_out) if list_out else 0
+            return avg_in, avg_out
+        # Map to final CSV column names
+        categories_map = [
+            ("Closed-Reasoning", "Closed-Reasoning"),
+            ("Closed-Flagship", "Closed-Flagship"),
+            ("Closed-Economy", "Closed-Economy"),
+            ("Open-Large(>100B)", "Open-Large (>100B)"),
+            ("Open-Medium(20-100B)", "Open-Medium (20-100B)"),
+            ("Open-Small(<20B)", "Open-Small (<20B)")
+        ]
+        print("\n--- Today's LLM Average Prices ($/1M Tokens) ---")
+        print(f"{'Category':<25} | {'Input':<8} | {'Output':<8}")
+        print("-" * 50)
+        for raw_key, display_name in categories_map:
+            val_in, val_out = calc_avg(raw_key)
+            final_row[f"{display_name} Input"] = round(val_in, 4)
+            final_row[f"{display_name} Output"] = round(val_out, 4)
+            print(f"{display_name:<25} | ${val_in:<7.3f} | ${val_out:<7.3f}")
+        return final_row
+    def save_data(self, row_data):
+        df = pd.DataFrame([row_data])
+        if os.path.exists(self.file_name):
+            try:
+                existing = pd.read_csv(self.file_name)
+                # Use concat instead of append
+                combined = pd.concat([existing, df], ignore_index=True)
+                combined.to_csv(self.file_name, index=False)
+                print(f"\nSuccessfully appended data to: {self.file_name}")
+            except Exception as e:
+                print(f"Error writing to file (please close the CSV file): {e}")
+        else:
+            df.to_csv(self.file_name, index=False)
+            print(f"\nNew file created: {self.file_name}")
+if __name__ == "__main__":
+    tracker = LLMPriceMonitor()
+    raw_data = tracker.fetch_models()
+    if raw_data:
+        processed_data = tracker.categorize_and_calculate(raw_data)
+        tracker.save_data(processed_data)