Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,113 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def search_spaces(search_text, category):
|
| 5 |
if category == "All Categories":
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
+
from huggingface_hub import HfApi
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
|
| 6 |
+
# ------------------------------------------------------
|
| 7 |
+
# Get spaces with more details
|
| 8 |
+
api = HfApi()
|
| 9 |
+
spaces = api.list_spaces(limit=40000) # Limiting to 40000 for now
|
| 10 |
+
|
| 11 |
+
# Create a DataFrame
|
| 12 |
+
data = []
|
| 13 |
+
for space in spaces:
|
| 14 |
+
data.append({
|
| 15 |
+
'id': space.id,
|
| 16 |
+
'title': space.id.split('/')[-1],
|
| 17 |
+
'author': space.author if space.author else space.id.split('/')[0],
|
| 18 |
+
'likes': space.likes,
|
| 19 |
+
'tags': space.tags if hasattr(space, 'tags') else [],
|
| 20 |
+
})
|
| 21 |
+
|
| 22 |
+
df = pd.DataFrame(data)
|
| 23 |
+
print("Total spaces collected:", len(df))
|
| 24 |
+
print("\nSample of the data:")
|
| 25 |
+
print(df.head())
|
| 26 |
+
|
| 27 |
+
# ------------------------------------------------------
|
| 28 |
+
|
| 29 |
+
# Define categories and their keywords
|
| 30 |
+
categories = {
|
| 31 |
+
'Text-to-Speech': ['tts', 'speech', 'voice', 'audio', 'kokoro'],
|
| 32 |
+
'Transcription': ['transcribe', 'transcription'],
|
| 33 |
+
'Agents': ['agent', 'agents', 'smol', 'multi-step', 'autobot', 'autoGPT' 'agentic'],
|
| 34 |
+
'Image Generation': ['stable-diffusion', 'diffusion', 'gan', 'image', 'img2img', 'style', 'art'],
|
| 35 |
+
'Video': ['video', 'animation', 'motion', 'sora'],
|
| 36 |
+
'Face/Portrait': ['face', 'portrait', 'gaze', 'facial'],
|
| 37 |
+
'Chat/LLM': ['chat', 'llm', 'gpt', 'llama', 'text', 'language'],
|
| 38 |
+
'3D': ['3d', 'mesh', 'point-cloud', 'depth'],
|
| 39 |
+
'Audio': ['audio', 'music', 'sound', 'voice'],
|
| 40 |
+
'Vision': ['vision', 'detection', 'recognition', 'classifier'],
|
| 41 |
+
'CLIP': ['image-to-text', 'describe-image'],
|
| 42 |
+
'Games': ['game', 'games', 'play', 'playground'],
|
| 43 |
+
'Finance': ['finance', 'stock', 'money', 'currency', 'bank', 'market'],
|
| 44 |
+
'SAM': ['sam', 'segmentation', 'mask'],
|
| 45 |
+
'Science': ['science', 'physics', 'chemistry', 'biology', 'math', 'astronomy', 'geology', 'meteorology', 'engineering', 'medicine', 'health', 'nutrition', 'environment', 'ecology', 'geography', 'geology', 'geophysics'],
|
| 46 |
+
'Education': ['education', 'school', 'university', 'college', 'teaching', 'learning', 'study', 'research'],
|
| 47 |
+
'Graph': ['graph', 'network', 'node', 'edge', 'path', 'tree', 'cycle', 'flow', 'matching', 'coloring', 'swarm'],
|
| 48 |
+
'Research': ['research', 'study', 'experiment', 'paper', 'discovery', 'innovation', 'exploration', 'analysis'],
|
| 49 |
+
'Document Analyis': ['pdf', 'RAG', 'idefecs'],
|
| 50 |
+
'WebGPU': ['localModel', 'webGPU'],
|
| 51 |
+
'Point Tracking': ['CoTracker', 'tapir', 'tapnet', 'point', 'track'],
|
| 52 |
+
'Games': ['game', 'Unity', 'UE5', 'Unreal'],
|
| 53 |
+
'Leaderboard': ['arena', 'leaderboard', 'timeline'],
|
| 54 |
+
'Other': [] # Default category
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
def categorize_space(title, tags):
|
| 58 |
+
title_lower = title.lower()
|
| 59 |
+
# Convert tags to lowercase if tags exist
|
| 60 |
+
tags_lower = [t.lower() for t in tags] if tags else []
|
| 61 |
+
|
| 62 |
+
for category, keywords in categories.items():
|
| 63 |
+
# Check both title and tags for keywords
|
| 64 |
+
if any(keyword in title_lower for keyword in keywords) or \
|
| 65 |
+
any(keyword in tag for keyword in keywords for tag in tags_lower):
|
| 66 |
+
return category
|
| 67 |
+
return 'Other'
|
| 68 |
+
|
| 69 |
+
# Add category to DataFrame
|
| 70 |
+
df['category'] = df.apply(lambda x: categorize_space(x['title'], x['tags']), axis=1)
|
| 71 |
+
|
| 72 |
+
# Show category distribution
|
| 73 |
+
category_counts = df['category'].value_counts()
|
| 74 |
+
print("\nCategory Distribution:")
|
| 75 |
+
print(category_counts)
|
| 76 |
+
|
| 77 |
+
# Show sample spaces from each category
|
| 78 |
+
print("\nSample spaces from each category:")
|
| 79 |
+
for category in categories.keys():
|
| 80 |
+
print(f"\n{category}:")
|
| 81 |
+
sample = df[df['category'] == category].head(3)
|
| 82 |
+
print(sample[['title', 'likes']].to_string())
|
| 83 |
+
|
| 84 |
+
# ------------------------------------------------------
|
| 85 |
+
# Add total likes per category
|
| 86 |
+
category_likes = df.groupby('category')['likes'].sum().sort_values(ascending=False)
|
| 87 |
+
print("Total likes per category:")
|
| 88 |
+
print(category_likes)
|
| 89 |
+
|
| 90 |
+
print("\nTop 10 spaces in each category (sorted by likes):")
|
| 91 |
+
for category in categories.keys():
|
| 92 |
+
print(f"\n=== {category} ===")
|
| 93 |
+
top_10 = df[df['category'] == category].nlargest(10, 'likes')[['title', 'likes']]
|
| 94 |
+
# Format output with padding for better readability
|
| 95 |
+
print(top_10.to_string(index=False))
|
| 96 |
+
|
| 97 |
+
# ------------------------------------------------------
|
| 98 |
+
|
| 99 |
+
# Add space URLs
|
| 100 |
+
df['url'] = 'https://huggingface.co/spaces/' + df['id']
|
| 101 |
+
|
| 102 |
+
# Let's show the top 5 spaces from each category with their links
|
| 103 |
+
print("Top 5 spaces in each category with links:")
|
| 104 |
+
for category in categories.keys():
|
| 105 |
+
print(f"\n=== {category} ===")
|
| 106 |
+
top_5 = df[df['category'] == category].nlargest(5, 'likes')[['title', 'likes', 'url']]
|
| 107 |
+
# Format output with padding for better readability
|
| 108 |
+
print(top_5.to_string(index=False))
|
| 109 |
+
|
| 110 |
+
# ------------------------------------------------------
|
| 111 |
|
| 112 |
def search_spaces(search_text, category):
|
| 113 |
if category == "All Categories":
|