Spaces:
Sleeping
Sleeping
kz209 commited on
Commit ·
6639e76
1
Parent(s): 9be9232
upload data
Browse files- Dockerfile +27 -0
- app.py +102 -57
- convert_llm_price.py +98 -0
- crontab_job +1 -0
- gpu_info_collector.sh +4 -0
- gpu_price_history.csv +0 -0
- gpu_price_tracker.py +200 -0
- llm_price_trends.csv +0 -0
- llm_token_tracker.py +165 -0
Dockerfile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
FROM python:3.9-slim-buster
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install cron
|
| 7 |
+
RUN apt-get update && apt-get install -y cron
|
| 8 |
+
|
| 9 |
+
# Copy application files
|
| 10 |
+
COPY requirements.txt .
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
COPY . .
|
| 14 |
+
|
| 15 |
+
# Give execution rights to the script
|
| 16 |
+
RUN chmod +x gpu_info_collector.sh
|
| 17 |
+
|
| 18 |
+
COPY crontab_job /etc/cron.d/crontab_job
|
| 19 |
+
|
| 20 |
+
# Give execution rights on the cron job
|
| 21 |
+
RUN chmod 0644 /etc/cron.d/crontab_job
|
| 22 |
+
|
| 23 |
+
# Apply cron job
|
| 24 |
+
RUN crontab /etc/cron.d/crontab_job
|
| 25 |
+
|
| 26 |
+
# Start cron in the background and run the Python app
|
| 27 |
+
CMD cron && python app.py
|
app.py
CHANGED
|
@@ -1,14 +1,46 @@
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import plotly.express as px
|
| 4 |
import re
|
| 5 |
import io
|
| 6 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# ==========================================
|
| 9 |
-
# 1.
|
| 10 |
# ==========================================
|
| 11 |
|
|
|
|
| 12 |
def clean_and_read_file(file_path):
|
| 13 |
"""
|
| 14 |
Robust file reader:
|
|
@@ -23,8 +55,8 @@ def clean_and_read_file(file_path):
|
|
| 23 |
try:
|
| 24 |
df = pd.read_excel(file_path)
|
| 25 |
return df
|
| 26 |
-
except:
|
| 27 |
-
pass
|
| 28 |
|
| 29 |
# --- Strategy B: Read as Text ---
|
| 30 |
raw_data = b""
|
|
@@ -48,51 +80,59 @@ def clean_and_read_file(file_path):
|
|
| 48 |
|
| 49 |
# --- Cleaning ---
|
| 50 |
content = re.sub(r"\\", "", content)
|
| 51 |
-
|
| 52 |
lines = content.splitlines()
|
| 53 |
cleaned_lines = []
|
| 54 |
buffer = ""
|
| 55 |
-
date_pattern = re.compile(r'^\s*202\d-\d{2}-\d{2}')
|
| 56 |
-
|
| 57 |
for line in lines:
|
| 58 |
line = line.strip()
|
| 59 |
-
if not line:
|
| 60 |
-
|
|
|
|
| 61 |
is_header = "Date" in line and ("," in line)
|
| 62 |
is_date_row = date_pattern.match(line) is not None
|
| 63 |
-
|
| 64 |
if is_header or is_date_row:
|
| 65 |
-
if buffer:
|
|
|
|
| 66 |
buffer = line
|
| 67 |
else:
|
| 68 |
buffer += " " + line
|
| 69 |
-
|
| 70 |
-
if buffer:
|
| 71 |
-
|
|
|
|
| 72 |
csv_content = "\n".join(cleaned_lines)
|
| 73 |
try:
|
| 74 |
df = pd.read_csv(io.StringIO(csv_content))
|
| 75 |
-
except:
|
| 76 |
try:
|
| 77 |
-
df = pd.read_csv(io.StringIO(csv_content),
|
| 78 |
-
|
|
|
|
|
|
|
| 79 |
return pd.DataFrame()
|
| 80 |
|
| 81 |
return df
|
| 82 |
|
| 83 |
# ==========================================
|
| 84 |
-
# 2.
|
| 85 |
# ==========================================
|
| 86 |
|
|
|
|
| 87 |
def process_gpu_data(df):
|
| 88 |
-
if df.empty:
|
|
|
|
| 89 |
df.columns = [str(c).strip() for c in df.columns]
|
| 90 |
|
| 91 |
if 'Date' in df.columns:
|
| 92 |
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
|
| 93 |
-
|
| 94 |
def clean_currency(x):
|
| 95 |
-
if isinstance(x, (int, float)):
|
|
|
|
| 96 |
if isinstance(x, str):
|
| 97 |
match = re.search(r'(\d+\.?\d*)', x)
|
| 98 |
return float(match.group(1)) if match else 0.0
|
|
@@ -106,60 +146,65 @@ def process_gpu_data(df):
|
|
| 106 |
if 'Rent' in c or '/hr' in c:
|
| 107 |
target_col = c
|
| 108 |
break
|
| 109 |
-
|
| 110 |
if target_col:
|
| 111 |
df['Rent_Price_Num'] = df[target_col].apply(clean_currency)
|
| 112 |
-
|
| 113 |
return df
|
| 114 |
|
|
|
|
| 115 |
def process_llm_data(df):
|
| 116 |
-
if df.empty:
|
|
|
|
| 117 |
df.columns = [str(c).strip() for c in df.columns]
|
| 118 |
-
|
| 119 |
if 'Date' in df.columns:
|
| 120 |
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
|
| 121 |
-
|
| 122 |
return df
|
| 123 |
|
| 124 |
# ==========================================
|
| 125 |
-
# 3.
|
| 126 |
# ==========================================
|
| 127 |
|
|
|
|
| 128 |
def plot_gpu_trends(df):
|
| 129 |
if df is None or df.empty or 'Rent_Price_Num' not in df.columns:
|
| 130 |
return None
|
| 131 |
-
|
| 132 |
plot_df = df.dropna(subset=['Date', 'Rent_Price_Num'])
|
| 133 |
-
if plot_df.empty:
|
|
|
|
| 134 |
|
| 135 |
chip_col = 'Chip' if 'Chip' in df.columns else df.columns[1]
|
| 136 |
|
| 137 |
-
fig = px.line(
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
| 146 |
return fig
|
| 147 |
|
| 148 |
def plot_llm_trends(df):
|
| 149 |
-
"""
|
| 150 |
if df is None or df.empty:
|
| 151 |
return None
|
| 152 |
-
|
| 153 |
-
#
|
| 154 |
value_vars = [c for c in df.columns if c != 'Date']
|
| 155 |
if not value_vars:
|
| 156 |
return None
|
| 157 |
|
| 158 |
plot_df = df[['Date'] + value_vars].copy().dropna(subset=['Date'])
|
| 159 |
-
|
| 160 |
# Melt
|
| 161 |
df_long = plot_df.melt(id_vars=['Date'], var_name='Model', value_name='Price')
|
| 162 |
-
|
| 163 |
fig = px.line(
|
| 164 |
df_long,
|
| 165 |
x='Date',
|
|
@@ -172,10 +217,10 @@ def plot_llm_trends(df):
|
|
| 172 |
return fig
|
| 173 |
|
| 174 |
# ==========================================
|
| 175 |
-
# 4. Gradio
|
| 176 |
# ==========================================
|
| 177 |
|
| 178 |
-
DEFAULT_GPU_FILE = "gpu_price_history.csv"
|
| 179 |
DEFAULT_LLM_FILE = "llm_price_trends.csv"
|
| 180 |
|
| 181 |
def load_gpu_pipeline():
|
|
@@ -191,7 +236,7 @@ def load_llm_pipeline():
|
|
| 191 |
# --- UI Definition ---
|
| 192 |
with gr.Blocks(title="AI Price Tracker") as demo:
|
| 193 |
gr.Markdown("## 📊 AI Compute & Model Price Trends")
|
| 194 |
-
|
| 195 |
with gr.Tabs():
|
| 196 |
# GPU Tab
|
| 197 |
with gr.TabItem("GPU Prices"):
|
|
@@ -205,10 +250,10 @@ with gr.Blocks(title="AI Price Tracker") as demo:
|
|
| 205 |
# LLM Tab (Updated: No Filter)
|
| 206 |
with gr.TabItem("LLM Prices"):
|
| 207 |
with gr.Row():
|
| 208 |
-
#
|
| 209 |
with gr.Column(scale=1):
|
| 210 |
llm_plot = gr.Plot(label="Price Trend")
|
| 211 |
-
|
| 212 |
with gr.Row():
|
| 213 |
with gr.Accordion("Data Preview", open=False):
|
| 214 |
llm_table = gr.DataFrame()
|
|
@@ -217,10 +262,10 @@ with gr.Blocks(title="AI Price Tracker") as demo:
|
|
| 217 |
def init_on_load():
|
| 218 |
# Load GPU
|
| 219 |
g_df, g_fig = load_gpu_pipeline()
|
| 220 |
-
|
| 221 |
# Load LLM (No checkbox needed anymore)
|
| 222 |
l_df, l_fig = load_llm_pipeline()
|
| 223 |
-
|
| 224 |
return (
|
| 225 |
g_fig, # gpu_plot
|
| 226 |
g_df, # gpu_table
|
|
@@ -228,17 +273,17 @@ with gr.Blocks(title="AI Price Tracker") as demo:
|
|
| 228 |
l_df # llm_table
|
| 229 |
)
|
| 230 |
|
| 231 |
-
#
|
| 232 |
demo.load(
|
| 233 |
-
init_on_load,
|
| 234 |
-
inputs=None,
|
| 235 |
outputs=[
|
| 236 |
-
gpu_plot,
|
| 237 |
-
gpu_table,
|
| 238 |
-
llm_plot,
|
| 239 |
llm_table
|
| 240 |
]
|
| 241 |
)
|
| 242 |
|
| 243 |
if __name__ == "__main__":
|
| 244 |
-
demo.launch(share=True)
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
| 5 |
import plotly.express as px
|
| 6 |
import re
|
| 7 |
import io
|
| 8 |
+
import subprocess
|
| 9 |
+
|
| 10 |
+
os.system("chmod +x gpu_info_collector.sh")
|
| 11 |
+
|
| 12 |
+
# 1. 定义运行脚本的函数
|
| 13 |
+
def run_shell_script(secret_key):
|
| 14 |
+
# 安全检查:比对暗号,防止路人随便触发
|
| 15 |
+
# 记得在 Space 的 Settings -> Variables and secrets 里设置一个环境变量叫 "RUN_KEY"
|
| 16 |
+
expected_key = os.environ.get("RUN_KEY")
|
| 17 |
+
|
| 18 |
+
if secret_key != expected_key:
|
| 19 |
+
return "❌ 鉴权失败:暗号错误!"
|
| 20 |
+
|
| 21 |
+
print("收到指令,开始运行脚本...")
|
| 22 |
+
|
| 23 |
+
# 2. 核心:运行 .sh 文件
|
| 24 |
+
# capture_output=True 可以让我们看到脚本输出的日志
|
| 25 |
+
try:
|
| 26 |
+
result = subprocess.run(
|
| 27 |
+
["./myscript.sh"],
|
| 28 |
+
shell=True,
|
| 29 |
+
capture_output=True,
|
| 30 |
+
text=True
|
| 31 |
+
)
|
| 32 |
+
log_output = f"Standard Output:\n{result.stdout}\n\nError Output:\n{result.stderr}"
|
| 33 |
+
print(log_output) # 这会打印到 Space 的 Logs 里
|
| 34 |
+
return f"✅ 脚本运行完毕!\n{log_output}"
|
| 35 |
+
except Exception as e:
|
| 36 |
+
return f"⚠️ 运行出错: {str(e)}"
|
| 37 |
+
|
| 38 |
|
| 39 |
# ==========================================
|
| 40 |
+
# 1. Data Reading Engine
|
| 41 |
# ==========================================
|
| 42 |
|
| 43 |
+
|
| 44 |
def clean_and_read_file(file_path):
|
| 45 |
"""
|
| 46 |
Robust file reader:
|
|
|
|
| 55 |
try:
|
| 56 |
df = pd.read_excel(file_path)
|
| 57 |
return df
|
| 58 |
+
except Exception:
|
| 59 |
+
pass
|
| 60 |
|
| 61 |
# --- Strategy B: Read as Text ---
|
| 62 |
raw_data = b""
|
|
|
|
| 80 |
|
| 81 |
# --- Cleaning ---
|
| 82 |
content = re.sub(r"\\", "", content)
|
| 83 |
+
|
| 84 |
lines = content.splitlines()
|
| 85 |
cleaned_lines = []
|
| 86 |
buffer = ""
|
| 87 |
+
date_pattern = re.compile(r'^\s*202\d-\d{2}-\d{2}')
|
| 88 |
+
|
| 89 |
for line in lines:
|
| 90 |
line = line.strip()
|
| 91 |
+
if not line:
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
is_header = "Date" in line and ("," in line)
|
| 95 |
is_date_row = date_pattern.match(line) is not None
|
| 96 |
+
|
| 97 |
if is_header or is_date_row:
|
| 98 |
+
if buffer:
|
| 99 |
+
cleaned_lines.append(buffer)
|
| 100 |
buffer = line
|
| 101 |
else:
|
| 102 |
buffer += " " + line
|
| 103 |
+
|
| 104 |
+
if buffer:
|
| 105 |
+
cleaned_lines.append(buffer)
|
| 106 |
+
|
| 107 |
csv_content = "\n".join(cleaned_lines)
|
| 108 |
try:
|
| 109 |
df = pd.read_csv(io.StringIO(csv_content))
|
| 110 |
+
except Exception:
|
| 111 |
try:
|
| 112 |
+
df = pd.read_csv(io.StringIO(csv_content),
|
| 113 |
+
sep=None,
|
| 114 |
+
engine='python')
|
| 115 |
+
except Exception:
|
| 116 |
return pd.DataFrame()
|
| 117 |
|
| 118 |
return df
|
| 119 |
|
| 120 |
# ==========================================
|
| 121 |
+
# 2. Data Processing
|
| 122 |
# ==========================================
|
| 123 |
|
| 124 |
+
|
| 125 |
def process_gpu_data(df):
|
| 126 |
+
if df.empty:
|
| 127 |
+
return df
|
| 128 |
df.columns = [str(c).strip() for c in df.columns]
|
| 129 |
|
| 130 |
if 'Date' in df.columns:
|
| 131 |
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
|
| 132 |
+
|
| 133 |
def clean_currency(x):
|
| 134 |
+
if isinstance(x, (int, float)):
|
| 135 |
+
return float(x)
|
| 136 |
if isinstance(x, str):
|
| 137 |
match = re.search(r'(\d+\.?\d*)', x)
|
| 138 |
return float(match.group(1)) if match else 0.0
|
|
|
|
| 146 |
if 'Rent' in c or '/hr' in c:
|
| 147 |
target_col = c
|
| 148 |
break
|
| 149 |
+
|
| 150 |
if target_col:
|
| 151 |
df['Rent_Price_Num'] = df[target_col].apply(clean_currency)
|
| 152 |
+
|
| 153 |
return df
|
| 154 |
|
| 155 |
+
|
| 156 |
def process_llm_data(df):
|
| 157 |
+
if df.empty:
|
| 158 |
+
return df
|
| 159 |
df.columns = [str(c).strip() for c in df.columns]
|
| 160 |
+
|
| 161 |
if 'Date' in df.columns:
|
| 162 |
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
|
| 163 |
+
|
| 164 |
return df
|
| 165 |
|
| 166 |
# ==========================================
|
| 167 |
+
# 3. Plotting Logic
|
| 168 |
# ==========================================
|
| 169 |
|
| 170 |
+
|
| 171 |
def plot_gpu_trends(df):
|
| 172 |
if df is None or df.empty or 'Rent_Price_Num' not in df.columns:
|
| 173 |
return None
|
| 174 |
+
|
| 175 |
plot_df = df.dropna(subset=['Date', 'Rent_Price_Num'])
|
| 176 |
+
if plot_df.empty:
|
| 177 |
+
return None
|
| 178 |
|
| 179 |
chip_col = 'Chip' if 'Chip' in df.columns else df.columns[1]
|
| 180 |
|
| 181 |
+
fig = px.line(plot_df,
|
| 182 |
+
x='Date',
|
| 183 |
+
y='Rent_Price_Num',
|
| 184 |
+
color=chip_col if chip_col in df.columns else None,
|
| 185 |
+
title='GPU Cloud Rental Price Trends ($/hr)',
|
| 186 |
+
labels={
|
| 187 |
+
'Rent_Price_Num': 'Price ($/hr)',
|
| 188 |
+
'Date': 'Date'
|
| 189 |
+
},
|
| 190 |
+
markers=True)
|
| 191 |
return fig
|
| 192 |
|
| 193 |
def plot_llm_trends(df):
|
| 194 |
+
"""Plot trends for all columns, no selection needed anymore"""
|
| 195 |
if df is None or df.empty:
|
| 196 |
return None
|
| 197 |
+
|
| 198 |
+
# Automatically select all columns except Date
|
| 199 |
value_vars = [c for c in df.columns if c != 'Date']
|
| 200 |
if not value_vars:
|
| 201 |
return None
|
| 202 |
|
| 203 |
plot_df = df[['Date'] + value_vars].copy().dropna(subset=['Date'])
|
| 204 |
+
|
| 205 |
# Melt
|
| 206 |
df_long = plot_df.melt(id_vars=['Date'], var_name='Model', value_name='Price')
|
| 207 |
+
|
| 208 |
fig = px.line(
|
| 209 |
df_long,
|
| 210 |
x='Date',
|
|
|
|
| 217 |
return fig
|
| 218 |
|
| 219 |
# ==========================================
|
| 220 |
+
# 4. Gradio Interface
|
| 221 |
# ==========================================
|
| 222 |
|
| 223 |
+
DEFAULT_GPU_FILE = "gpu_price_history.csv"
|
| 224 |
DEFAULT_LLM_FILE = "llm_price_trends.csv"
|
| 225 |
|
| 226 |
def load_gpu_pipeline():
|
|
|
|
| 236 |
# --- UI Definition ---
|
| 237 |
with gr.Blocks(title="AI Price Tracker") as demo:
|
| 238 |
gr.Markdown("## 📊 AI Compute & Model Price Trends")
|
| 239 |
+
|
| 240 |
with gr.Tabs():
|
| 241 |
# GPU Tab
|
| 242 |
with gr.TabItem("GPU Prices"):
|
|
|
|
| 250 |
# LLM Tab (Updated: No Filter)
|
| 251 |
with gr.TabItem("LLM Prices"):
|
| 252 |
with gr.Row():
|
| 253 |
+
# Display chart directly, no column division
|
| 254 |
with gr.Column(scale=1):
|
| 255 |
llm_plot = gr.Plot(label="Price Trend")
|
| 256 |
+
|
| 257 |
with gr.Row():
|
| 258 |
with gr.Accordion("Data Preview", open=False):
|
| 259 |
llm_table = gr.DataFrame()
|
|
|
|
| 262 |
def init_on_load():
|
| 263 |
# Load GPU
|
| 264 |
g_df, g_fig = load_gpu_pipeline()
|
| 265 |
+
|
| 266 |
# Load LLM (No checkbox needed anymore)
|
| 267 |
l_df, l_fig = load_llm_pipeline()
|
| 268 |
+
|
| 269 |
return (
|
| 270 |
g_fig, # gpu_plot
|
| 271 |
g_df, # gpu_table
|
|
|
|
| 273 |
l_df # llm_table
|
| 274 |
)
|
| 275 |
|
| 276 |
+
# Bind load event
|
| 277 |
demo.load(
|
| 278 |
+
init_on_load,
|
| 279 |
+
inputs=None,
|
| 280 |
outputs=[
|
| 281 |
+
gpu_plot,
|
| 282 |
+
gpu_table,
|
| 283 |
+
llm_plot,
|
| 284 |
llm_table
|
| 285 |
]
|
| 286 |
)
|
| 287 |
|
| 288 |
if __name__ == "__main__":
|
| 289 |
+
demo.launch(share=True)
|
convert_llm_price.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
excel_file = "/Users/yukino/Downloads/May_change_the_world/Compute-and-Token-Watch/llm_price_trends.xlsx"
|
| 4 |
+
csv_file = "/Users/yukino/Downloads/May_change_the_world/Compute-and-Token-Watch/llm_price_trends.csv"
|
| 5 |
+
|
| 6 |
+
try:
|
| 7 |
+
df = pd.read_excel(excel_file)
|
| 8 |
+
|
| 9 |
+
# Define the correct column names based on llm_token_tracker.py
|
| 10 |
+
target_columns = [
|
| 11 |
+
"Date",
|
| 12 |
+
"Closed-Reasoning (o1/Opus) Input",
|
| 13 |
+
"Closed-Reasoning (o1/Opus) Output",
|
| 14 |
+
"Closed-Flagship (GPT4o/Sonnet) Input",
|
| 15 |
+
"Closed-Flagship (GPT4o/Sonnet) Output",
|
| 16 |
+
"Closed-Economy (Mini/Flash) Input",
|
| 17 |
+
"Closed-Economy (Mini/Flash) Output",
|
| 18 |
+
"Open-Large (>100B/V3) Input",
|
| 19 |
+
"Open-Large (>100B/V3) Output",
|
| 20 |
+
"Open-Medium (20-100B) Input",
|
| 21 |
+
"Open-Medium (20-100B) Output",
|
| 22 |
+
"Open-Small (<20B) Input",
|
| 23 |
+
"Open-Small (<20B) Output"
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
# Assuming the existing XLSX columns might have slightly different names
|
| 27 |
+
# We need to create a mapping if they are not exact.
|
| 28 |
+
# For simplicity, let's assume the first row of the XLSX contains the current column names.
|
| 29 |
+
# If the XLSX has different column names, this will need to be adjusted.
|
| 30 |
+
|
| 31 |
+
# We will try to rename columns if there are close matches, otherwise, we'll rely on order.
|
| 32 |
+
# For a robust solution, we would need to inspect the actual XLSX column headers.
|
| 33 |
+
|
| 34 |
+
# If the XLSX file has fewer columns or different order, this might cause issues.
|
| 35 |
+
# For now, let's assume the existing columns in XLSX are a subset or can be mapped directly.
|
| 36 |
+
|
| 37 |
+
# Rename columns to match the target_columns. This is a crucial step.
|
| 38 |
+
# Without knowing the exact column names in the original XLSX, I'll make a best effort guess.
|
| 39 |
+
# For now, I will just reassign columns if the length matches, which is risky.
|
| 40 |
+
# A safer approach is to check if the column names are present and rename.
|
| 41 |
+
|
| 42 |
+
# If the number of columns in the DataFrame matches the target_columns, assign directly.
|
| 43 |
+
if len(df.columns) == len(target_columns):
|
| 44 |
+
df.columns = target_columns
|
| 45 |
+
else:
|
| 46 |
+
# If the number of columns doesn't match, we need a more sophisticated mapping.
|
| 47 |
+
# For now, I will print a warning and proceed with a simple rename attempt.
|
| 48 |
+
print("Warning: Number of columns in XLSX does not match expected target columns. Attempting best-effort rename.")
|
| 49 |
+
# Create a dictionary for renaming, assuming order might be slightly off but names are similar.
|
| 50 |
+
# This part needs careful review if the XLSX has very different headers.
|
| 51 |
+
|
| 52 |
+
# A safer approach would be to manually inspect the XLSX and create a specific mapping.
|
| 53 |
+
# For this automated task, I'll try to map based on substrings or common patterns.
|
| 54 |
+
# However, given the complexity of dynamic column names, direct assignment is simplest if count matches.
|
| 55 |
+
|
| 56 |
+
# If the columns don't match, we will try to infer mapping based on content or patterns.
|
| 57 |
+
# Since I don't have access to the XLSX content, I will proceed with a simple mapping
|
| 58 |
+
# that assumes the order is generally correct but names might differ.
|
| 59 |
+
|
| 60 |
+
# This part is highly dependent on the actual XLSX column names.
|
| 61 |
+
# I'll assume a direct mapping if the user wants to convert existing xlsx to csv.
|
| 62 |
+
# The `llm_token_tracker.py` produces very specific column names.
|
| 63 |
+
# It's likely the existing xlsx has similar names.
|
| 64 |
+
|
| 65 |
+
# Best effort mapping based on the order and a few keywords
|
| 66 |
+
# This part requires manual adjustment if the XLSX has significantly different headers
|
| 67 |
+
|
| 68 |
+
# For now, I will assume the first column is Date, and then pairs of Input/Output columns.
|
| 69 |
+
# This is a very strong assumption without seeing the XLSX.
|
| 70 |
+
|
| 71 |
+
# Let's try to map by matching the start of the column names.
|
| 72 |
+
# This is still an assumption. The best way is to know the actual XLSX column names.
|
| 73 |
+
|
| 74 |
+
# Since the problem asks to convert to CSV and rename columns, and I have the target column names,
|
| 75 |
+
# I will enforce these target columns. If the original XLSX doesn't have them, they will be NaN.
|
| 76 |
+
|
| 77 |
+
# Create a new DataFrame with target columns and populate from existing if names match.
|
| 78 |
+
new_df = pd.DataFrame(columns=target_columns)
|
| 79 |
+
for col in target_columns:
|
| 80 |
+
if col in df.columns:
|
| 81 |
+
new_df[col] = df[col]
|
| 82 |
+
else:
|
| 83 |
+
# Attempt to find a close match, e.g., 'Closed-Reasoning Input' might be 'Closed-Reasoning (o1/Opus) Input'
|
| 84 |
+
# This is getting too complex without actual XLSX column names.
|
| 85 |
+
# Let's stick to direct column assignment if possible, or leave as is for unmatched.
|
| 86 |
+
|
| 87 |
+
# For now, I will try to rename by simple string matching, and if not found, fill with NA.
|
| 88 |
+
# This is a simplification. The user needs to verify if the output CSV is correct.
|
| 89 |
+
pass # The existing rename handles exact matches. Remaining will be NA if not found.
|
| 90 |
+
df = new_df # Replace original df with the new one with target columns
|
| 91 |
+
|
| 92 |
+
df.to_csv(csv_file, index=False)
|
| 93 |
+
print(f"Successfully converted {excel_file} to {csv_file}")
|
| 94 |
+
|
| 95 |
+
except FileNotFoundError:
|
| 96 |
+
print(f"Error: {excel_file} not found.")
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"An error occurred during conversion: {e}")
|
crontab_job
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0 1 * * * /app/gpu_info_collector.sh >> /app/cron.log 2>&1
|
gpu_info_collector.sh
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#/bin/bash
|
| 2 |
+
|
| 3 |
+
python3 llm_token_tracker.py
|
| 4 |
+
python3 gpu_price_tracker.py
|
gpu_price_history.csv
CHANGED
|
Binary files a/gpu_price_history.csv and b/gpu_price_history.csv differ
|
|
|
gpu_price_tracker.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import random
|
| 5 |
+
import time
|
| 6 |
+
import os
|
| 7 |
+
import re
|
| 8 |
+
import statistics
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class RobustHardwareTracker:
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
# Preset fallback prices, used when the crawler is completely blocked
|
| 16 |
+
self.fallback_prices = {
|
| 17 |
+
"H100": "$28,500 - $32,000",
|
| 18 |
+
"V100": "$350 - $650",
|
| 19 |
+
"B300": "Contact Sales (Q4 2025)",
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
# Masquerade as real browser request headers (added key fields like Accept, Language)
|
| 23 |
+
self.headers_list = [{
|
| 24 |
+
"User-Agent":
|
| 25 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
| 26 |
+
"Accept":
|
| 27 |
+
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
| 28 |
+
"Accept-Language": "en-US,en;q=0.9",
|
| 29 |
+
"Accept-Encoding": "gzip, deflate, br",
|
| 30 |
+
"Connection": "keep-alive",
|
| 31 |
+
"Upgrade-Insecure-Requests": "1"
|
| 32 |
+
}, {
|
| 33 |
+
"User-Agent":
|
| 34 |
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
|
| 35 |
+
"Accept":
|
| 36 |
+
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 37 |
+
"Accept-Language": "en-US,en;q=0.9",
|
| 38 |
+
"Connection": "keep-alive",
|
| 39 |
+
}]
|
| 40 |
+
|
| 41 |
+
def get_cloud_price(self, chip):
|
| 42 |
+
"""Get cloud rental prices"""
|
| 43 |
+
urls = {
|
| 44 |
+
"B300":
|
| 45 |
+
"https://getdeploying.com/reference/cloud-gpu/nvidia-dgx-b300",
|
| 46 |
+
"H100": "https://getdeploying.com/reference/cloud-gpu/nvidia-h100",
|
| 47 |
+
"V100": "https://getdeploying.com/reference/cloud-gpu/nvidia-v100",
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
url = urls.get(chip)
|
| 52 |
+
if not url:
|
| 53 |
+
return "N/A"
|
| 54 |
+
|
| 55 |
+
h = random.choice(self.headers_list)
|
| 56 |
+
resp = requests.get(url, headers=h, timeout=15)
|
| 57 |
+
|
| 58 |
+
if resp.status_code != 200:
|
| 59 |
+
return "N/A"
|
| 60 |
+
|
| 61 |
+
soup = BeautifulSoup(resp.text, 'html.parser')
|
| 62 |
+
|
| 63 |
+
# Parse GetDeploying table
|
| 64 |
+
table = soup.find("table")
|
| 65 |
+
if table:
|
| 66 |
+
rows = table.find_all("tr")
|
| 67 |
+
prices = []
|
| 68 |
+
for row in rows:
|
| 69 |
+
txt = row.get_text()
|
| 70 |
+
if "$" in txt:
|
| 71 |
+
# Optimized regex: compatible with $2.00, $2, and $1,000.00
|
| 72 |
+
match = re.search(r"\$([0-9,]+(?:\.[0-9]+)?)", txt)
|
| 73 |
+
if match:
|
| 74 |
+
clean_price = float(
|
| 75 |
+
match.group(1).replace(",", ""))
|
| 76 |
+
prices.append(clean_price)
|
| 77 |
+
if prices:
|
| 78 |
+
return f"${min(prices):.2f}/hr"
|
| 79 |
+
|
| 80 |
+
return "Sold Out"
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(str(e))
|
| 83 |
+
return "Check Provider"
|
| 84 |
+
|
| 85 |
+
def get_hardware_price(self, chip, search_query):
|
| 86 |
+
"""Get eBay hardware selling prices"""
|
| 87 |
+
if chip == "B300":
|
| 88 |
+
return self.fallback_prices["B300"]
|
| 89 |
+
|
| 90 |
+
url = "https://www.ebay.com/sch/i.html"
|
| 91 |
+
params = {
|
| 92 |
+
"_nkw": search_query,
|
| 93 |
+
"LH_Sold": "1",
|
| 94 |
+
"LH_Complete": "1",
|
| 95 |
+
"rt": "nc"
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
try:
|
| 99 |
+
# Add random delay to simulate human operation
|
| 100 |
+
time.sleep(random.uniform(2.0, 4.0))
|
| 101 |
+
h = random.choice(self.headers_list)
|
| 102 |
+
resp = requests.get(url, params=params, headers=h, timeout=15)
|
| 103 |
+
|
| 104 |
+
soup = BeautifulSoup(resp.text, 'html.parser')
|
| 105 |
+
price_tags = soup.select(".s-item__price")
|
| 106 |
+
|
| 107 |
+
prices = []
|
| 108 |
+
for tag in price_tags:
|
| 109 |
+
text = tag.get_text(strip=True)
|
| 110 |
+
|
| 111 |
+
# Exclude the first hidden placeholder (Shop on eBay) in eBay search results
|
| 112 |
+
if "Shop on eBay" in text or not text:
|
| 113 |
+
continue
|
| 114 |
+
|
| 115 |
+
# Handle price range, take the lowest price
|
| 116 |
+
if "to" in text:
|
| 117 |
+
text = text.split("to")[0]
|
| 118 |
+
|
| 119 |
+
# Strengthen regex: extract valid amounts from text
|
| 120 |
+
match = re.search(r'([0-9,]+(?:\.[0-9]{2})?)', text)
|
| 121 |
+
if match:
|
| 122 |
+
try:
|
| 123 |
+
val = float(match.group(1).replace(",", ""))
|
| 124 |
+
# Filter out outliers below $100 (usually accessories, manuals, or pure cooling fans)
|
| 125 |
+
if val > 100:
|
| 126 |
+
prices.append(val)
|
| 127 |
+
except ValueError:
|
| 128 |
+
continue
|
| 129 |
+
|
| 130 |
+
# Stop after collecting 10 valid samples
|
| 131 |
+
if len(prices) >= 10:
|
| 132 |
+
break
|
| 133 |
+
|
| 134 |
+
if not prices:
|
| 135 |
+
return f"{self.fallback_prices[chip]} (Est)"
|
| 136 |
+
|
| 137 |
+
median_val = statistics.median(prices)
|
| 138 |
+
return f"${median_val:,.2f}"
|
| 139 |
+
|
| 140 |
+
except Exception as e:
|
| 141 |
+
print(str(e))
|
| 142 |
+
return f"{self.fallback_prices[chip]} (Est)"
|
| 143 |
+
|
| 144 |
+
def collect_data(self):
|
| 145 |
+
inventory = [
|
| 146 |
+
{
|
| 147 |
+
"Code": "B300",
|
| 148 |
+
"Name": "Blackwell B300",
|
| 149 |
+
"Query": "NVIDIA B300 GPU"
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"Code": "H100",
|
| 153 |
+
"Name": "Hopper H100",
|
| 154 |
+
"Query": "NVIDIA H100 PCIe 80GB"
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"Code": "V100",
|
| 158 |
+
"Name": "Volta V100",
|
| 159 |
+
"Query": "NVIDIA Tesla V100 16GB PCIe"
|
| 160 |
+
},
|
| 161 |
+
]
|
| 162 |
+
|
| 163 |
+
results = []
|
| 164 |
+
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 165 |
+
|
| 166 |
+
print(f"Fetching data [{current_time}]...")
|
| 167 |
+
for item in inventory:
|
| 168 |
+
rent = self.get_cloud_price(item["Code"])
|
| 169 |
+
buy = self.get_hardware_price(item["Code"], item["Query"])
|
| 170 |
+
|
| 171 |
+
results.append({
|
| 172 |
+
"Date": current_time,
|
| 173 |
+
"Chip": item["Name"],
|
| 174 |
+
"Cloud Rent (/hr)": rent,
|
| 175 |
+
"Hardware Price": buy
|
| 176 |
+
})
|
| 177 |
+
print(f" -> Fetched {item['Name']}")
|
| 178 |
+
|
| 179 |
+
return results
|
| 180 |
+
|
| 181 |
+
def save_to_csv(new_data, filename="./gpu_price_history.csv"):
|
| 182 |
+
new_df = pd.DataFrame(new_data)
|
| 183 |
+
|
| 184 |
+
if os.path.exists(filename):
|
| 185 |
+
try:
|
| 186 |
+
existing_df = pd.read_csv(filename)
|
| 187 |
+
combined_df = pd.concat([existing_df, new_df], ignore_index=True)
|
| 188 |
+
combined_df.to_csv(filename, index=False)
|
| 189 |
+
print(f"Successfully appended data to {filename}")
|
| 190 |
+
except Exception as e:
|
| 191 |
+
print(f"Error writing to CSV: {e}")
|
| 192 |
+
new_df.to_csv(filename, index=False)
|
| 193 |
+
else:
|
| 194 |
+
new_df.to_csv(filename, index=False)
|
| 195 |
+
print(f"New file created at {filename}")
|
| 196 |
+
|
| 197 |
+
if __name__ == "__main__":
|
| 198 |
+
tracker = RobustHardwareTracker()
|
| 199 |
+
data = tracker.collect_data()
|
| 200 |
+
save_to_csv(data)
|
llm_price_trends.csv
CHANGED
|
Binary files a/llm_price_trends.csv and b/llm_price_trends.csv differ
|
|
|
llm_token_tracker.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
|
| 7 |
+
class LLMPriceMonitor:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.api_url = "https://openrouter.ai/api/v1/models"
|
| 10 |
+
self.file_name = "./llm_price_trends.csv"
|
| 11 |
+
|
| 12 |
+
# Define a list of closed-source providers
|
| 13 |
+
self.closed_providers = ["openai", "anthropic", "google"]
|
| 14 |
+
|
| 15 |
+
# Classification rules for closed-source models by keywords
|
| 16 |
+
self.closed_keywords = {
|
| 17 |
+
"Reasoning (Reasoning/High-IQ)": ["gpt-5.2-pro", "claude-opus-4.6", "gemini-3.1-pro-preview"],
|
| 18 |
+
"Flagship (General Flagship)": ["gpt-5.2", "claude-sonnet-4.6"],
|
| 19 |
+
"Economy (Economic/Lightweight)": ["gpt-5-mini", "claude-haiku-4.5", "gemini-3-flash-preview"]
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
def fetch_models(self):
|
| 23 |
+
"""Fetch real-time data from OpenRouter API"""
|
| 24 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Connecting to OpenRouter API...")
|
| 25 |
+
try:
|
| 26 |
+
resp = requests.get(self.api_url, timeout=20)
|
| 27 |
+
if resp.status_code == 200:
|
| 28 |
+
return resp.json().get("data", [])
|
| 29 |
+
print("API request failed")
|
| 30 |
+
return []
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f"Network error: {e}")
|
| 33 |
+
return []
|
| 34 |
+
|
| 35 |
+
def parse_parameter_size(self, model_id, model_name):
|
| 36 |
+
"""Attempt to extract parameter size (e.g., 70b, 405b) from model ID"""
|
| 37 |
+
# Common alias mappings
|
| 38 |
+
if "deepseek-v3" in model_id or "deepseek-chat" in model_id:
|
| 39 |
+
return 671 # DeepSeek V3 is MoE 671B
|
| 40 |
+
if "deepseek-r1" in model_id:
|
| 41 |
+
return 671
|
| 42 |
+
|
| 43 |
+
# Regex to extract number+b (e.g., 70b, 8b)
|
| 44 |
+
text = (model_id + " " + model_name).lower()
|
| 45 |
+
match = re.search(r"(\d+)b", text)
|
| 46 |
+
if match:
|
| 47 |
+
return int(match.group(1))
|
| 48 |
+
return 0
|
| 49 |
+
|
| 50 |
+
def categorize_and_calculate(self, models):
|
| 51 |
+
"""Core logic: categorize and calculate average price"""
|
| 52 |
+
stats = {
|
| 53 |
+
"Date": datetime.now().strftime("%Y-%m-%d %H:%M"),
|
| 54 |
+
# Closed-source
|
| 55 |
+
"Closed-Reasoning_In": [], "Closed-Reasoning_Out": [],
|
| 56 |
+
"Closed-Flagship_In": [], "Closed-Flagship_Out": [],
|
| 57 |
+
"Closed-Economy_In": [], "Closed-Economy_Out": [],
|
| 58 |
+
# Open-source
|
| 59 |
+
"Open-Small(<20B)_In": [], "Open-Small(<20B)_Out": [],
|
| 60 |
+
"Open-Medium(20-100B)_In": [], "Open-Medium(20-100B)_Out": [],
|
| 61 |
+
"Open-Large(>100B)_In": [], "Open-Large(>100B)_Out": [],
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
print("Categorizing and cleaning data...")
|
| 65 |
+
|
| 66 |
+
for m in models:
|
| 67 |
+
mid = m.get("id", "").lower()
|
| 68 |
+
name = m.get("name", "").lower()
|
| 69 |
+
|
| 70 |
+
# Get prices (convert units to $/1M tokens)
|
| 71 |
+
try:
|
| 72 |
+
p_in = float(m["pricing"]["prompt"]) * 1_000_000
|
| 73 |
+
p_out = float(m["pricing"]["completion"]) * 1_000_000
|
| 74 |
+
|
| 75 |
+
# Filter out 0-cost models (free/test versions can lower average) and abnormally high-priced test models
|
| 76 |
+
if p_in <= 0 or p_in > 200:
|
| 77 |
+
continue
|
| 78 |
+
except (ValueError, TypeError):
|
| 79 |
+
continue
|
| 80 |
+
|
| 81 |
+
# --- Determine if it's Closed-source or Open-source ---
|
| 82 |
+
is_closed = any(p in mid for p in self.closed_providers)
|
| 83 |
+
|
| 84 |
+
if is_closed:
|
| 85 |
+
# Closed-source logic: categorize by keywords
|
| 86 |
+
if any(k in mid for k in self.closed_keywords["Reasoning (Reasoning/High-IQ)"]):
|
| 87 |
+
stats["Closed-Reasoning_In"].append(p_in)
|
| 88 |
+
stats["Closed-Reasoning_Out"].append(p_out)
|
| 89 |
+
elif any(k in mid for k in self.closed_keywords["Economy (Economic/Lightweight)"]):
|
| 90 |
+
stats["Closed-Economy_In"].append(p_in)
|
| 91 |
+
stats["Closed-Economy_Out"].append(p_out)
|
| 92 |
+
elif any(k in mid for k in self.closed_keywords["Flagship (General Flagship)"]):
|
| 93 |
+
# Exclude cases that also contain mini/flash (to prevent gpt-4o-mini from entering the flagship group)
|
| 94 |
+
if not any(x in mid for x in ["mini", "flash", "haiku"]):
|
| 95 |
+
stats["Closed-Flagship_In"].append(p_in)
|
| 96 |
+
stats["Closed-Flagship_Out"].append(p_out)
|
| 97 |
+
else:
|
| 98 |
+
# Open-source logic: categorize by parameter size
|
| 99 |
+
size = self.parse_parameter_size(mid, name)
|
| 100 |
+
if size > 0:
|
| 101 |
+
if size < 20:
|
| 102 |
+
stats["Open-Small(<20B)_In"].append(p_in)
|
| 103 |
+
stats["Open-Small(<20B)_Out"].append(p_out)
|
| 104 |
+
elif 20 <= size <= 100:
|
| 105 |
+
stats["Open-Medium(20-100B)_In"].append(p_in)
|
| 106 |
+
stats["Open-Medium(20-100B)_Out"].append(p_out)
|
| 107 |
+
else: # > 100
|
| 108 |
+
stats["Open-Large(>100B)_In"].append(p_in)
|
| 109 |
+
stats["Open-Large(>100B)_Out"].append(p_out)
|
| 110 |
+
|
| 111 |
+
# --- Calculate Averages ---
|
| 112 |
+
final_row = {"Date": stats["Date"]}
|
| 113 |
+
|
| 114 |
+
# Helper function: calculate average and format
|
| 115 |
+
def calc_avg(key_prefix):
|
| 116 |
+
list_in = stats.get(f"{key_prefix}_In", [])
|
| 117 |
+
list_out = stats.get(f"{key_prefix}_Out", [])
|
| 118 |
+
avg_in = sum(list_in)/len(list_in) if list_in else 0
|
| 119 |
+
avg_out = sum(list_out)/len(list_out) if list_out else 0
|
| 120 |
+
return avg_in, avg_out
|
| 121 |
+
|
| 122 |
+
# Map to final CSV column names
|
| 123 |
+
categories_map = [
|
| 124 |
+
("Closed-Reasoning", "Closed-Reasoning"),
|
| 125 |
+
("Closed-Flagship", "Closed-Flagship"),
|
| 126 |
+
("Closed-Economy", "Closed-Economy"),
|
| 127 |
+
("Open-Large(>100B)", "Open-Large (>100B)"),
|
| 128 |
+
("Open-Medium(20-100B)", "Open-Medium (20-100B)"),
|
| 129 |
+
("Open-Small(<20B)", "Open-Small (<20B)")
|
| 130 |
+
]
|
| 131 |
+
|
| 132 |
+
print("\n--- Today's LLM Average Prices ($/1M Tokens) ---")
|
| 133 |
+
print(f"{'Category':<25} | {'Input':<8} | {'Output':<8}")
|
| 134 |
+
print("-" * 50)
|
| 135 |
+
|
| 136 |
+
for raw_key, display_name in categories_map:
|
| 137 |
+
val_in, val_out = calc_avg(raw_key)
|
| 138 |
+
final_row[f"{display_name} Input"] = round(val_in, 4)
|
| 139 |
+
final_row[f"{display_name} Output"] = round(val_out, 4)
|
| 140 |
+
print(f"{display_name:<25} | ${val_in:<7.3f} | ${val_out:<7.3f}")
|
| 141 |
+
|
| 142 |
+
return final_row
|
| 143 |
+
|
| 144 |
+
def save_data(self, row_data):
|
| 145 |
+
df = pd.DataFrame([row_data])
|
| 146 |
+
|
| 147 |
+
if os.path.exists(self.file_name):
|
| 148 |
+
try:
|
| 149 |
+
existing = pd.read_csv(self.file_name)
|
| 150 |
+
# Use concat instead of append
|
| 151 |
+
combined = pd.concat([existing, df], ignore_index=True)
|
| 152 |
+
combined.to_csv(self.file_name, index=False)
|
| 153 |
+
print(f"\nSuccessfully appended data to: {self.file_name}")
|
| 154 |
+
except Exception as e:
|
| 155 |
+
print(f"Error writing to file (please close the CSV file): {e}")
|
| 156 |
+
else:
|
| 157 |
+
df.to_csv(self.file_name, index=False)
|
| 158 |
+
print(f"\nNew file created: {self.file_name}")
|
| 159 |
+
|
| 160 |
+
if __name__ == "__main__":
|
| 161 |
+
tracker = LLMPriceMonitor()
|
| 162 |
+
raw_data = tracker.fetch_models()
|
| 163 |
+
if raw_data:
|
| 164 |
+
processed_data = tracker.categorize_and_calculate(raw_data)
|
| 165 |
+
tracker.save_data(processed_data)
|