nbroad commited on
Commit
f11953b
·
verified ·
1 Parent(s): 97931ff

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +40 -24
app.py CHANGED
@@ -1,20 +1,18 @@
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, Request, BackgroundTasks
2
- from fastapi.responses import HTMLResponse
3
- from fastapi.staticfiles import StaticFiles
4
  from fastapi.templating import Jinja2Templates
5
- import requests
6
- from bs4 import BeautifulSoup
7
  import asyncio
8
  import aiohttp
9
- from datetime import datetime, timezone
10
- from typing import List, Dict, Optional
11
  import uvicorn
12
- import os
13
  import pandas as pd
14
  from datasets import Dataset, load_dataset
15
- from huggingface_hub import HfApi
16
- import logging
17
- from contextlib import asynccontextmanager
18
 
19
  # Configure logging
20
  logging.basicConfig(level=logging.INFO)
@@ -231,11 +229,14 @@ async def get_providers_data():
231
  # Sort by request count descending
232
  results.sort(key=lambda x: x["monthly_requests_int"], reverse=True)
233
 
234
- return {
235
  "providers": results,
236
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
237
  "total_providers": len(results)
238
- }
 
 
 
239
 
240
  @app.get("/api/providers/{provider}")
241
  async def get_provider_data(provider: str):
@@ -256,11 +257,13 @@ async def get_historical_data():
256
  """API endpoint to get historical data for line chart"""
257
  if not HF_TOKEN:
258
  logger.warning("No HF_TOKEN available for historical data")
259
- return {
260
  "error": "Historical data not available - no HF token",
261
  "historical_data": {},
262
  "message": "Historical data collection requires HuggingFace token"
263
- }
 
 
264
 
265
  try:
266
  # Load historical dataset
@@ -271,11 +274,13 @@ async def get_historical_data():
271
 
272
  if df.empty:
273
  logger.info("Dataset is empty - no historical data available yet")
274
- return {
275
  "historical_data": {},
276
  "message": "No historical data available yet. Data collection is running - check back in 30 minutes.",
277
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
278
- }
 
 
279
 
280
  # Group by timestamp and provider, get the latest entry for each timestamp-provider combo
281
  df['timestamp'] = pd.to_datetime(df['timestamp'])
@@ -324,14 +329,18 @@ async def get_historical_data():
324
  else:
325
  date_range = "No data"
326
 
327
- return {
 
328
  "historical_data": historical_data,
329
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
330
  "total_data_points": total_data_points,
331
  "data_range": date_range,
332
  "earliest_date": df_filtered['timestamp'].min().isoformat() if not df_filtered.empty else None,
333
  "latest_date": df_filtered['timestamp'].max().isoformat() if not df_filtered.empty else None
334
- }
 
 
 
335
 
336
  except Exception as e:
337
  logger.error(f"Error fetching historical data: {e}")
@@ -340,19 +349,23 @@ async def get_historical_data():
340
  logger.info("Dataset doesn't exist yet, triggering initial data collection")
341
  try:
342
  await collect_and_store_data()
343
- return {
344
  "historical_data": {},
345
  "message": "Dataset created! Historical data will appear after a few data collection cycles.",
346
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
347
- }
 
 
348
  except Exception as create_error:
349
  logger.error(f"Failed to create initial dataset: {create_error}")
350
 
351
- return {
352
  "error": f"Failed to fetch historical data: {str(e)}",
353
  "historical_data": {},
354
  "message": "Historical data temporarily unavailable"
355
- }
 
 
356
 
357
  @app.get("/api/models")
358
  async def get_provider_models_data():
@@ -400,14 +413,17 @@ async def get_provider_models_data():
400
  for provider in PROVIDERS:
401
  provider_totals[provider] = len(provider_models.get(provider, set()))
402
 
403
- return {
404
  "matrix": matrix,
405
  "providers": PROVIDERS,
406
  "provider_totals": provider_totals,
407
  "provider_mapping": PROVIDER_TO_INFERENCE_NAME,
408
  "total_models": len(all_models),
409
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
410
- }
 
 
 
411
 
412
  @app.post("/api/collect-now")
413
  async def trigger_data_collection(background_tasks: BackgroundTasks):
 
1
+ import os
2
+ import logging
3
+ from typing import List, Dict
4
+ from contextlib import asynccontextmanager
5
+ from datetime import datetime, timezone
6
+
7
+ from bs4 import BeautifulSoup
8
  from fastapi import FastAPI, Request, BackgroundTasks
 
 
9
  from fastapi.templating import Jinja2Templates
10
+ from fastapi.responses import JSONResponse
 
11
  import asyncio
12
  import aiohttp
 
 
13
  import uvicorn
 
14
  import pandas as pd
15
  from datasets import Dataset, load_dataset
 
 
 
16
 
17
  # Configure logging
18
  logging.basicConfig(level=logging.INFO)
 
229
  # Sort by request count descending
230
  results.sort(key=lambda x: x["monthly_requests_int"], reverse=True)
231
 
232
+ response = JSONResponse({
233
  "providers": results,
234
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
235
  "total_providers": len(results)
236
+ })
237
+ # Cache for 30 seconds to reduce load
238
+ response.headers["Cache-Control"] = "public, max-age=30"
239
+ return response
240
 
241
  @app.get("/api/providers/{provider}")
242
  async def get_provider_data(provider: str):
 
257
  """API endpoint to get historical data for line chart"""
258
  if not HF_TOKEN:
259
  logger.warning("No HF_TOKEN available for historical data")
260
+ response = JSONResponse({
261
  "error": "Historical data not available - no HF token",
262
  "historical_data": {},
263
  "message": "Historical data collection requires HuggingFace token"
264
+ })
265
+ response.headers["Cache-Control"] = "public, max-age=60"
266
+ return response
267
 
268
  try:
269
  # Load historical dataset
 
274
 
275
  if df.empty:
276
  logger.info("Dataset is empty - no historical data available yet")
277
+ response = JSONResponse({
278
  "historical_data": {},
279
  "message": "No historical data available yet. Data collection is running - check back in 30 minutes.",
280
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
281
+ })
282
+ response.headers["Cache-Control"] = "public, max-age=60"
283
+ return response
284
 
285
  # Group by timestamp and provider, get the latest entry for each timestamp-provider combo
286
  df['timestamp'] = pd.to_datetime(df['timestamp'])
 
329
  else:
330
  date_range = "No data"
331
 
332
+ from fastapi.responses import JSONResponse
333
+ response = JSONResponse({
334
  "historical_data": historical_data,
335
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
336
  "total_data_points": total_data_points,
337
  "data_range": date_range,
338
  "earliest_date": df_filtered['timestamp'].min().isoformat() if not df_filtered.empty else None,
339
  "latest_date": df_filtered['timestamp'].max().isoformat() if not df_filtered.empty else None
340
+ })
341
+ # Cache for 2 minutes since historical data doesn't change as frequently
342
+ response.headers["Cache-Control"] = "public, max-age=120"
343
+ return response
344
 
345
  except Exception as e:
346
  logger.error(f"Error fetching historical data: {e}")
 
349
  logger.info("Dataset doesn't exist yet, triggering initial data collection")
350
  try:
351
  await collect_and_store_data()
352
+ response = JSONResponse({
353
  "historical_data": {},
354
  "message": "Dataset created! Historical data will appear after a few data collection cycles.",
355
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
356
+ })
357
+ response.headers["Cache-Control"] = "public, max-age=60"
358
+ return response
359
  except Exception as create_error:
360
  logger.error(f"Failed to create initial dataset: {create_error}")
361
 
362
+ response = JSONResponse({
363
  "error": f"Failed to fetch historical data: {str(e)}",
364
  "historical_data": {},
365
  "message": "Historical data temporarily unavailable"
366
+ })
367
+ response.headers["Cache-Control"] = "public, max-age=30"
368
+ return response
369
 
370
  @app.get("/api/models")
371
  async def get_provider_models_data():
 
413
  for provider in PROVIDERS:
414
  provider_totals[provider] = len(provider_models.get(provider, set()))
415
 
416
+ response = JSONResponse({
417
  "matrix": matrix,
418
  "providers": PROVIDERS,
419
  "provider_totals": provider_totals,
420
  "provider_mapping": PROVIDER_TO_INFERENCE_NAME,
421
  "total_models": len(all_models),
422
  "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
423
+ })
424
+ # Cache for 5 minutes since models data changes infrequently
425
+ response.headers["Cache-Control"] = "public, max-age=300"
426
+ return response
427
 
428
  @app.post("/api/collect-now")
429
  async def trigger_data_collection(background_tasks: BackgroundTasks):