Spaces:
Sleeping
Sleeping
$P@D$3RV£R commited on
Commit ·
37e4ae7
1
Parent(s): 2ae9356
Implement HF Space visit count fetching from API and HTML scraping
Browse files
app.py
CHANGED
|
@@ -131,25 +131,64 @@ def save_stats(stats):
|
|
| 131 |
import traceback
|
| 132 |
traceback.print_exc()
|
| 133 |
|
| 134 |
-
def get_hf_space_visits():
|
| 135 |
-
"""Try to get HuggingFace Space visit count from the Space page"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
try:
|
| 137 |
-
space_url = "https://huggingface.co/spaces/
|
| 138 |
-
response = requests.get(space_url, timeout=5)
|
| 139 |
if response.status_code == 200:
|
| 140 |
-
# Try to find visit count in the HTML (this is a workaround since there's no API)
|
| 141 |
-
import re
|
| 142 |
-
# Look for common patterns that might contain visit counts
|
| 143 |
-
# This is a heuristic approach - HF may change their HTML structure
|
| 144 |
html = response.text
|
| 145 |
-
|
| 146 |
-
#
|
| 147 |
-
#
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
except Exception as e:
|
| 150 |
-
print(f"Could not
|
| 151 |
|
| 152 |
-
# Return None -
|
| 153 |
return None
|
| 154 |
|
| 155 |
def track_visit():
|
|
@@ -303,9 +342,12 @@ def tagger():
|
|
| 303 |
total_visits = stats_data.get('total_visits', 0)
|
| 304 |
unique_count = len(stats_data['unique_visitors']) if isinstance(stats_data.get('unique_visitors'), set) else len(stats_data.get('unique_visitors', []))
|
| 305 |
countries_count = len(stats_data.get('countries', {}))
|
| 306 |
-
#
|
| 307 |
-
hf_space_visits =
|
| 308 |
-
|
|
|
|
|
|
|
|
|
|
| 309 |
total_visits = 0
|
| 310 |
unique_count = 0
|
| 311 |
countries_count = 0
|
|
@@ -1060,7 +1102,7 @@ if __name__ == "__main__":
|
|
| 1060 |
app.config["FOLDER_SETS"] = []
|
| 1061 |
app.config["DATASET_ERROR"] = error_msg
|
| 1062 |
else:
|
| 1063 |
-
|
| 1064 |
app.config["DATASET_ERROR"] = None
|
| 1065 |
app.config["HEAD"] = 0
|
| 1066 |
app.config["IMAGE_SET_INDEX"] = 0
|
|
|
|
| 131 |
import traceback
|
| 132 |
traceback.print_exc()
|
| 133 |
|
| 134 |
+
def get_hf_space_visits(space_id="0001AMA/auto_object_annotator_0.0.4"):
|
| 135 |
+
"""Try to get HuggingFace Space visit count from the Space page or metrics API"""
|
| 136 |
+
import re
|
| 137 |
+
|
| 138 |
+
# Method 1: Try the metrics API endpoint (may require auth, but worth trying)
|
| 139 |
+
try:
|
| 140 |
+
metrics_url = f"https://huggingface.co/api/spaces/{space_id}/metrics"
|
| 141 |
+
response = requests.get(metrics_url, timeout=5)
|
| 142 |
+
if response.status_code == 200:
|
| 143 |
+
data = response.json()
|
| 144 |
+
# Look for visit count in the response
|
| 145 |
+
if isinstance(data, dict):
|
| 146 |
+
# Try common field names
|
| 147 |
+
for key in ['views', 'visits', 'total_views', 'total_visits', 'viewCount', 'visitCount']:
|
| 148 |
+
if key in data:
|
| 149 |
+
return int(data[key])
|
| 150 |
+
except Exception as e:
|
| 151 |
+
print(f"Metrics API failed: {e}")
|
| 152 |
+
|
| 153 |
+
# Method 2: Scrape from the Space page HTML
|
| 154 |
try:
|
| 155 |
+
space_url = f"https://huggingface.co/spaces/{space_id}"
|
| 156 |
+
response = requests.get(space_url, timeout=5, headers={'User-Agent': 'Mozilla/5.0'})
|
| 157 |
if response.status_code == 200:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
html = response.text
|
| 159 |
+
|
| 160 |
+
# Look for visit count in various patterns
|
| 161 |
+
# Pattern 1: Look for numbers followed by "Views" or "Visits"
|
| 162 |
+
patterns = [
|
| 163 |
+
r'(\d+[kmKM]?)\s*[Vv]iews?', # "302 Views" or "1.37k Views"
|
| 164 |
+
r'(\d+[kmKM]?)\s*[Vv]isits?', # "302 Visits"
|
| 165 |
+
r'"views?":\s*(\d+)', # JSON: "views": 302
|
| 166 |
+
r'"visits?":\s*(\d+)', # JSON: "visits": 302
|
| 167 |
+
r'viewCount["\']:\s*(\d+)', # viewCount: 302
|
| 168 |
+
]
|
| 169 |
+
|
| 170 |
+
for pattern in patterns:
|
| 171 |
+
matches = re.findall(pattern, html, re.IGNORECASE)
|
| 172 |
+
if matches:
|
| 173 |
+
# Take the first reasonable match (likely the largest number)
|
| 174 |
+
for match in matches:
|
| 175 |
+
try:
|
| 176 |
+
count_str = match.lower()
|
| 177 |
+
if 'k' in count_str:
|
| 178 |
+
return int(float(count_str.replace('k', '')) * 1000)
|
| 179 |
+
elif 'm' in count_str:
|
| 180 |
+
return int(float(count_str.replace('m', '')) * 1000000)
|
| 181 |
+
else:
|
| 182 |
+
count = int(count_str.replace(',', ''))
|
| 183 |
+
# Only return if it's a reasonable number (likely > 0)
|
| 184 |
+
if count > 0:
|
| 185 |
+
return count
|
| 186 |
+
except (ValueError, AttributeError):
|
| 187 |
+
continue
|
| 188 |
except Exception as e:
|
| 189 |
+
print(f"Could not scrape HF Space visits: {e}")
|
| 190 |
|
| 191 |
+
# Return None - will use app's own tracking as fallback
|
| 192 |
return None
|
| 193 |
|
| 194 |
def track_visit():
|
|
|
|
| 342 |
total_visits = stats_data.get('total_visits', 0)
|
| 343 |
unique_count = len(stats_data['unique_visitors']) if isinstance(stats_data.get('unique_visitors'), set) else len(stats_data.get('unique_visitors', []))
|
| 344 |
countries_count = len(stats_data.get('countries', {}))
|
| 345 |
+
# Try to get actual HF Space visit count, fallback to app's tracking
|
| 346 |
+
hf_space_visits = get_hf_space_visits()
|
| 347 |
+
if hf_space_visits is None:
|
| 348 |
+
hf_space_visits = total_visits # Fallback to app's own tracking
|
| 349 |
+
except Exception as e:
|
| 350 |
+
print(f"Error loading stats: {e}")
|
| 351 |
total_visits = 0
|
| 352 |
unique_count = 0
|
| 353 |
countries_count = 0
|
|
|
|
| 1102 |
app.config["FOLDER_SETS"] = []
|
| 1103 |
app.config["DATASET_ERROR"] = error_msg
|
| 1104 |
else:
|
| 1105 |
+
app.config["FOLDER_SETS"] = folder_sets
|
| 1106 |
app.config["DATASET_ERROR"] = None
|
| 1107 |
app.config["HEAD"] = 0
|
| 1108 |
app.config["IMAGE_SET_INDEX"] = 0
|