File size: 4,881 Bytes
ddd502d
122ab90
 
ddd502d
 
 
 
122ab90
 
 
ddd502d
 
 
 
 
122ab90
 
ddd502d
122ab90
 
ddd502d
 
 
 
 
122ab90
 
ddd502d
122ab90
ddd502d
 
 
 
 
 
 
 
 
 
 
122ab90
ddd502d
 
 
 
 
 
 
 
 
 
 
 
 
122ab90
ddd502d
 
 
 
122ab90
ddd502d
122ab90
ddd502d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122ab90
 
 
 
ddd502d
122ab90
 
ddd502d
122ab90
 
ddd502d
122ab90
ddd502d
 
 
 
 
 
 
122ab90
ddd502d
 
122ab90
ddd502d
 
 
 
122ab90
ddd502d
122ab90
ddd502d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""Hidden Gems Tool - Find undervalued Hugging Face models."""

import os
import json
from typing import Optional
from urllib.request import urlopen, Request
from urllib.error import HTTPError


def find_hidden_gems(
    limit: int = 100,
    min_downloads: int = 100,
    top: int = 20,
    pipeline_tag: Optional[str] = None,
    sort_by: str = "ratio"
) -> str:
    """
    Find hidden gem models with high likes-to-downloads ratios.
    
    Args:
        limit: Number of models to fetch from API (default: 100)
        min_downloads: Minimum downloads to filter out very new models (default: 100)
        top: Number of top results to return (default: 20)
        pipeline_tag: Filter by pipeline tag like "text-generation", "image-to-text" (optional)
        sort_by: Sort results by "ratio" (default), "likes", "downloads", or "trending"
    
    Returns:
        JSON string with list of hidden gem models ranked by score
    """
    # Build API URL
    api_url = f"https://huggingface.co/api/models?limit={limit}"
    if pipeline_tag:
        api_url += f"&pipeline_tag={pipeline_tag}"
    
    # Fetch models
    headers = {}
    token = os.environ.get("HF_TOKEN")
    if token:
        headers["Authorization"] = f"Bearer {token}"
    
    try:
        req = Request(api_url, headers=headers)
        with urlopen(req, timeout=60) as response:
            models = json.loads(response.read().decode("utf-8"))
    except HTTPError as e:
        return json.dumps({"error": f"API error: {e.code} - {e.reason}"})
    except Exception as e:
        return json.dumps({"error": f"Failed to fetch models: {str(e)}"})
    
    # Calculate hidden gem scores
    results = []
    for model in models:
        likes = model.get("likes")
        downloads = model.get("downloads")
        
        if likes is None or downloads is None:
            continue
        if downloads < min_downloads:
            continue
        
        ratio = likes / downloads if downloads > 0 else 0
        
        results.append({
            "id": model.get("id"),
            "likes": likes,
            "downloads": downloads,
            "ratio": round(ratio, 6),
            "pipeline_tag": model.get("pipeline_tag") or "unknown",
            "library_name": model.get("library_name") or "unknown",
            "createdAt": model.get("createdAt") or "unknown",
            "trendingScore": model.get("trendingScore") or 0,
            "tags": model.get("tags", [])
        })
    
    # Sort results
    sort_key = {
        "likes": "likes",
        "downloads": "downloads",
        "trending": "trendingScore"
    }.get(sort_by, "ratio")
    
    results.sort(key=lambda x: x[sort_key], reverse=True)
    
    # Take top N
    top_results = results[:top]
    
    return json.dumps({
        "count": len(results),
        "showing": len(top_results),
        "filters": {
            "min_downloads": min_downloads,
            "pipeline_tag": pipeline_tag,
            "sort_by": sort_by
        },
        "gems": top_results
    }, indent=2)


def get_model_details(model_id: str) -> str:
    """
    Get detailed information about a specific Hugging Face model.
    
    Args:
        model_id: The model ID (e.g., "microsoft/DialoGPT-medium")
    
    Returns:
        JSON string with detailed model information
    """
    api_url = f"https://huggingface.co/api/models/{model_id}"
    
    headers = {}
    token = os.environ.get("HF_TOKEN")
    if token:
        headers["Authorization"] = f"Bearer {token}"
    
    try:
        req = Request(api_url, headers=headers)
        with urlopen(req, timeout=30) as response:
            model = json.loads(response.read().decode("utf-8"))
    except HTTPError as e:
        if e.code == 404:
            return json.dumps({"error": f"Model '{model_id}' not found"})
        return json.dumps({"error": f"API error: {e.code} - {e.reason}"})
    except Exception as e:
        return json.dumps({"error": f"Failed to fetch model: {str(e)}"})
    
    # Calculate hidden gem score
    likes = model.get("likes", 0)
    downloads = model.get("downloads", 0)
    ratio = likes / downloads if downloads > 0 else 0
    
    result = {
        "id": model.get("id"),
        "likes": likes,
        "downloads": downloads,
        "hidden_gem_score": round(ratio, 6),
        "pipeline_tag": model.get("pipeline_tag") or "unknown",
        "library_name": model.get("library_name") or "unknown",
        "tags": model.get("tags", []),
        "createdAt": model.get("createdAt"),
        "lastModified": model.get("lastModified"),
        "cardExists": model.get("cardExists", False),
        "widgetData": model.get("widgetData", []),
        "siblings": [s.get("rfilename") for s in model.get("siblings", [])[:10]],
        "description": (model.get("cardData") or {}).get("tags", [])
    }
    
    return json.dumps(result, indent=2)