$P@D$3RV£R commited on
Commit
37e4ae7
·
1 Parent(s): 2ae9356

Implement HF Space visit count fetching from API and HTML scraping

Browse files
Files changed (1) hide show
  1. app.py +60 -18
app.py CHANGED
@@ -131,25 +131,64 @@ def save_stats(stats):
131
  import traceback
132
  traceback.print_exc()
133
 
134
- def get_hf_space_visits():
135
- """Try to get HuggingFace Space visit count from the Space page"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  try:
137
- space_url = "https://huggingface.co/spaces/0001AMA/auto_object_annotator_0.0.4"
138
- response = requests.get(space_url, timeout=5)
139
  if response.status_code == 200:
140
- # Try to find visit count in the HTML (this is a workaround since there's no API)
141
- import re
142
- # Look for common patterns that might contain visit counts
143
- # This is a heuristic approach - HF may change their HTML structure
144
  html = response.text
145
- # Try to find numbers that might be visit counts (look for patterns like "302", "1.37k", etc.)
146
- # Note: This is fragile and may need adjustment based on actual HF HTML structure
147
- # For now, return None if we can't find it reliably
148
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  except Exception as e:
150
- print(f"Could not fetch HF Space visits: {e}")
151
 
152
- # Return None - we'll use app's own tracking as fallback
153
  return None
154
 
155
  def track_visit():
@@ -303,9 +342,12 @@ def tagger():
303
  total_visits = stats_data.get('total_visits', 0)
304
  unique_count = len(stats_data['unique_visitors']) if isinstance(stats_data.get('unique_visitors'), set) else len(stats_data.get('unique_visitors', []))
305
  countries_count = len(stats_data.get('countries', {}))
306
- # Total Cumulative Visits = app's own tracking (cumulative since app started)
307
- hf_space_visits = total_visits
308
- except:
 
 
 
309
  total_visits = 0
310
  unique_count = 0
311
  countries_count = 0
@@ -1060,7 +1102,7 @@ if __name__ == "__main__":
1060
  app.config["FOLDER_SETS"] = []
1061
  app.config["DATASET_ERROR"] = error_msg
1062
  else:
1063
- app.config["FOLDER_SETS"] = folder_sets
1064
  app.config["DATASET_ERROR"] = None
1065
  app.config["HEAD"] = 0
1066
  app.config["IMAGE_SET_INDEX"] = 0
 
131
  import traceback
132
  traceback.print_exc()
133
 
134
+ def get_hf_space_visits(space_id="0001AMA/auto_object_annotator_0.0.4"):
135
+ """Try to get HuggingFace Space visit count from the Space page or metrics API"""
136
+ import re
137
+
138
+ # Method 1: Try the metrics API endpoint (may require auth, but worth trying)
139
+ try:
140
+ metrics_url = f"https://huggingface.co/api/spaces/{space_id}/metrics"
141
+ response = requests.get(metrics_url, timeout=5)
142
+ if response.status_code == 200:
143
+ data = response.json()
144
+ # Look for visit count in the response
145
+ if isinstance(data, dict):
146
+ # Try common field names
147
+ for key in ['views', 'visits', 'total_views', 'total_visits', 'viewCount', 'visitCount']:
148
+ if key in data:
149
+ return int(data[key])
150
+ except Exception as e:
151
+ print(f"Metrics API failed: {e}")
152
+
153
+ # Method 2: Scrape from the Space page HTML
154
  try:
155
+ space_url = f"https://huggingface.co/spaces/{space_id}"
156
+ response = requests.get(space_url, timeout=5, headers={'User-Agent': 'Mozilla/5.0'})
157
  if response.status_code == 200:
 
 
 
 
158
  html = response.text
159
+
160
+ # Look for visit count in various patterns
161
+ # Pattern 1: Look for numbers followed by "Views" or "Visits"
162
+ patterns = [
163
+ r'(\d+[kmKM]?)\s*[Vv]iews?', # "302 Views" or "1.37k Views"
164
+ r'(\d+[kmKM]?)\s*[Vv]isits?', # "302 Visits"
165
+ r'"views?":\s*(\d+)', # JSON: "views": 302
166
+ r'"visits?":\s*(\d+)', # JSON: "visits": 302
167
+ r'viewCount["\']:\s*(\d+)', # viewCount: 302
168
+ ]
169
+
170
+ for pattern in patterns:
171
+ matches = re.findall(pattern, html, re.IGNORECASE)
172
+ if matches:
173
+ # Take the first reasonable match (likely the largest number)
174
+ for match in matches:
175
+ try:
176
+ count_str = match.lower()
177
+ if 'k' in count_str:
178
+ return int(float(count_str.replace('k', '')) * 1000)
179
+ elif 'm' in count_str:
180
+ return int(float(count_str.replace('m', '')) * 1000000)
181
+ else:
182
+ count = int(count_str.replace(',', ''))
183
+ # Only return if it's a reasonable number (likely > 0)
184
+ if count > 0:
185
+ return count
186
+ except (ValueError, AttributeError):
187
+ continue
188
  except Exception as e:
189
+ print(f"Could not scrape HF Space visits: {e}")
190
 
191
+ # Return None - will use app's own tracking as fallback
192
  return None
193
 
194
  def track_visit():
 
342
  total_visits = stats_data.get('total_visits', 0)
343
  unique_count = len(stats_data['unique_visitors']) if isinstance(stats_data.get('unique_visitors'), set) else len(stats_data.get('unique_visitors', []))
344
  countries_count = len(stats_data.get('countries', {}))
345
+ # Try to get actual HF Space visit count, fallback to app's tracking
346
+ hf_space_visits = get_hf_space_visits()
347
+ if hf_space_visits is None:
348
+ hf_space_visits = total_visits # Fallback to app's own tracking
349
+ except Exception as e:
350
+ print(f"Error loading stats: {e}")
351
  total_visits = 0
352
  unique_count = 0
353
  countries_count = 0
 
1102
  app.config["FOLDER_SETS"] = []
1103
  app.config["DATASET_ERROR"] = error_msg
1104
  else:
1105
+ app.config["FOLDER_SETS"] = folder_sets
1106
  app.config["DATASET_ERROR"] = None
1107
  app.config["HEAD"] = 0
1108
  app.config["IMAGE_SET_INDEX"] = 0