Spaces:

evalstate
/

hidden-gems

Runtime error

File size: 4,881 Bytes

ddd502d
122ab90
 
ddd502d
 
 
 
122ab90
 
 
ddd502d
 
 
 
 
122ab90
 
ddd502d
122ab90
 
ddd502d
 
 
 
 
122ab90
 
ddd502d
122ab90
ddd502d
 
 
 
 
 
 
 
 
 
 
122ab90
ddd502d
 
 
 
 
 
 
 
 
 
 
 
 
122ab90
ddd502d
 
 
 
122ab90
ddd502d
122ab90
ddd502d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122ab90
 
 
 
ddd502d
122ab90
 
ddd502d
122ab90
 
ddd502d
122ab90
ddd502d
 
 
 
 
 
 
122ab90
ddd502d
 
122ab90
ddd502d
 
 
 
122ab90
ddd502d
122ab90
ddd502d

"""Hidden Gems Tool - Find undervalued Hugging Face models."""

import os
import json
from typing import Optional
from urllib.request import urlopen, Request
from urllib.error import HTTPError


def find_hidden_gems(
    limit: int = 100,
    min_downloads: int = 100,
    top: int = 20,
    pipeline_tag: Optional[str] = None,
    sort_by: str = "ratio"
) -> str:
    """
    Find hidden gem models with high likes-to-downloads ratios.
    
    Args:
        limit: Number of models to fetch from API (default: 100)
        min_downloads: Minimum downloads to filter out very new models (default: 100)
        top: Number of top results to return (default: 20)
        pipeline_tag: Filter by pipeline tag like "text-generation", "image-to-text" (optional)
        sort_by: Sort results by "ratio" (default), "likes", "downloads", or "trending"
    
    Returns:
        JSON string with list of hidden gem models ranked by score
    """
    # Build API URL
    api_url = f"https://huggingface.co/api/models?limit={limit}"
    if pipeline_tag:
        api_url += f"&pipeline_tag={pipeline_tag}"
    
    # Fetch models
    headers = {}
    token = os.environ.get("HF_TOKEN")
    if token:
        headers["Authorization"] = f"Bearer {token}"
    
    try:
        req = Request(api_url, headers=headers)
        with urlopen(req, timeout=60) as response:
            models = json.loads(response.read().decode("utf-8"))
    except HTTPError as e:
        return json.dumps({"error": f"API error: {e.code} - {e.reason}"})
    except Exception as e:
        return json.dumps({"error": f"Failed to fetch models: {str(e)}"})
    
    # Calculate hidden gem scores
    results = []
    for model in models:
        likes = model.get("likes")
        downloads = model.get("downloads")
        
        if likes is None or downloads is None:
            continue
        if downloads < min_downloads:
            continue
        
        ratio = likes / downloads if downloads > 0 else 0
        
        results.append({
            "id": model.get("id"),
            "likes": likes,
            "downloads": downloads,
            "ratio": round(ratio, 6),
            "pipeline_tag": model.get("pipeline_tag") or "unknown",
            "library_name": model.get("library_name") or "unknown",
            "createdAt": model.get("createdAt") or "unknown",
            "trendingScore": model.get("trendingScore") or 0,
            "tags": model.get("tags", [])
        })
    
    # Sort results
    sort_key = {
        "likes": "likes",
        "downloads": "downloads",
        "trending": "trendingScore"
    }.get(sort_by, "ratio")
    
    results.sort(key=lambda x: x[sort_key], reverse=True)
    
    # Take top N
    top_results = results[:top]
    
    return json.dumps({
        "count": len(results),
        "showing": len(top_results),
        "filters": {
            "min_downloads": min_downloads,
            "pipeline_tag": pipeline_tag,
            "sort_by": sort_by
        },
        "gems": top_results
    }, indent=2)


def get_model_details(model_id: str) -> str:
    """
    Get detailed information about a specific Hugging Face model.
    
    Args:
        model_id: The model ID (e.g., "microsoft/DialoGPT-medium")
    
    Returns:
        JSON string with detailed model information
    """
    api_url = f"https://huggingface.co/api/models/{model_id}"
    
    headers = {}
    token = os.environ.get("HF_TOKEN")
    if token:
        headers["Authorization"] = f"Bearer {token}"
    
    try:
        req = Request(api_url, headers=headers)
        with urlopen(req, timeout=30) as response:
            model = json.loads(response.read().decode("utf-8"))
    except HTTPError as e:
        if e.code == 404:
            return json.dumps({"error": f"Model '{model_id}' not found"})
        return json.dumps({"error": f"API error: {e.code} - {e.reason}"})
    except Exception as e:
        return json.dumps({"error": f"Failed to fetch model: {str(e)}"})
    
    # Calculate hidden gem score
    likes = model.get("likes", 0)
    downloads = model.get("downloads", 0)
    ratio = likes / downloads if downloads > 0 else 0
    
    result = {
        "id": model.get("id"),
        "likes": likes,
        "downloads": downloads,
        "hidden_gem_score": round(ratio, 6),
        "pipeline_tag": model.get("pipeline_tag") or "unknown",
        "library_name": model.get("library_name") or "unknown",
        "tags": model.get("tags", []),
        "createdAt": model.get("createdAt"),
        "lastModified": model.get("lastModified"),
        "cardExists": model.get("cardExists", False),
        "widgetData": model.get("widgetData", []),
        "siblings": [s.get("rfilename") for s in model.get("siblings", [])[:10]],
        "description": (model.get("cardData") or {}).get("tags", [])
    }
    
    return json.dumps(result, indent=2)