ginipick commited on
Commit
3832097
·
verified ·
1 Parent(s): 5e373c3

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -391
app.py DELETED
@@ -1,391 +0,0 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import plotly.express as px
4
- from datetime import datetime, timedelta
5
- import requests
6
- from io import BytesIO
7
- import traceback
8
-
9
- ######################################
10
- # 1) 데이터 로드 & 30일치 랭킹 산출
11
- ######################################
12
- def load_and_process_data():
13
- try:
14
- url = "https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/spaces.parquet"
15
- response = requests.get(url)
16
- df = pd.read_parquet(BytesIO(response.content))
17
-
18
- # 최근 30일 기준으로 필터링
19
- thirty_days_ago = datetime.now() - timedelta(days=30)
20
- df['createdAt'] = pd.to_datetime(df['createdAt'])
21
- df = df[df['createdAt'] >= thirty_days_ago].copy()
22
-
23
- # 30일 동안의 모든 날짜에 대해 순회
24
- dates = pd.date_range(start=thirty_days_ago, end=datetime.now(), freq='D')
25
- daily_ranks = []
26
-
27
- for date in dates:
28
- # date 날짜까지 생성된 스페이스만 필터링
29
- date_data = df[df['createdAt'].dt.date <= date.date()].copy()
30
- # trendingScore 높은 순, id 오름차순으로 정렬 후 rank 부여
31
- date_data = date_data.sort_values(['trendingScore', 'id'], ascending=[False, True])
32
- date_data['rank'] = range(1, len(date_data) + 1)
33
- date_data['date'] = date.date()
34
- daily_ranks.append(
35
- date_data[['id', 'date', 'rank', 'trendingScore', 'createdAt']]
36
- )
37
-
38
- daily_ranks_df = pd.concat(daily_ranks, ignore_index=True)
39
-
40
- # 최신 날짜의 (랭킹 1000위 이하)만 추출 ← 여기서 범위를 1000으로 확장!
41
- latest_date = daily_ranks_df['date'].max()
42
- top_1000_spaces = daily_ranks_df[
43
- (daily_ranks_df['date'] == latest_date) &
44
- (daily_ranks_df['rank'] <= 1000)
45
- ].sort_values('rank').copy()
46
-
47
- return daily_ranks_df, top_1000_spaces
48
- except Exception as e:
49
- print(f"Error loading data: {e}")
50
- traceback.print_exc()
51
- return pd.DataFrame(), pd.DataFrame()
52
-
53
- ######################################
54
- # 2) 중복 ID(2개 이상) 합산 -> 상위 20
55
- ######################################
56
- def get_top20_multiple_ids(top_n_spaces_df):
57
- """
58
- 주어진 데이터프레임(예: top_1000_spaces)에서,
59
- 동일한 id가 2번 이상 등장하는 경우 'trendingScore'를 합산하고,
60
- 합산 점수가 높은 순으로 상위 20개만 반환
61
- """
62
- if top_n_spaces_df.empty:
63
- return pd.DataFrame()
64
-
65
- try:
66
- # id별 등장 횟수
67
- id_counts = top_n_spaces_df['id'].value_counts()
68
- # 2개 이상 등장하는 id만 추출
69
- multiple_ids = id_counts[id_counts >= 2].index
70
-
71
- if len(multiple_ids) == 0:
72
- # 중복 id가 아예 없으면 빈 DF
73
- return pd.DataFrame()
74
-
75
- # 중복된 id에 해당하는 행만 필터링
76
- multiple_entries = top_n_spaces_df[top_n_spaces_df['id'].isin(multiple_ids)].copy()
77
-
78
- # id별 스코어 합산
79
- df_sum = (multiple_entries
80
- .groupby('id')['trendingScore']
81
- .sum()
82
- .reset_index()
83
- .rename(columns={'trendingScore': 'total_score'}))
84
-
85
- # 합산된 total_score 내림차순 정렬 -> 상위 20
86
- df_sum = df_sum.sort_values(by='total_score', ascending=False).head(20)
87
-
88
- return df_sum
89
- except Exception as e:
90
- print(f"Error in get_top20_multiple_ids: {e}")
91
- traceback.print_exc()
92
- return pd.DataFrame()
93
-
94
- ######################################
95
- # 3) 막대 차트 생성 (상위 20개)
96
- ######################################
97
- def create_score_chart(multiple_ids_df):
98
- """
99
- multiple_ids_df = [ id, total_score ] 형태
100
- """
101
- try:
102
- if multiple_ids_df.empty:
103
- # 중복된 id가 전혀 없는 경우 (or 무엇인가 잘못된 경우)
104
- placeholder_df = pd.DataFrame({"id": ["No multiple entries"], "total_score": [0]})
105
- fig = px.bar(
106
- placeholder_df,
107
- x="total_score",
108
- y="id",
109
- orientation='h'
110
- )
111
- fig.update_layout(
112
- title="No multiple entries found (in Top 1000)",
113
- xaxis_title="Total Trending Score",
114
- yaxis_title="Space ID",
115
- plot_bgcolor='white',
116
- paper_bgcolor='white',
117
- showlegend=False,
118
- margin=dict(l=200, r=20, t=40, b=40),
119
- )
120
- return fig
121
-
122
- # 막대 차트 생성
123
- fig = px.bar(
124
- multiple_ids_df,
125
- y='id',
126
- x='total_score',
127
- orientation='h',
128
- title="Top 20 IDs with Multiple Entries (Rank ≤ 1000)",
129
- text=[f"{score:.2f}" for score in multiple_ids_df['total_score']],
130
- height=500
131
- )
132
-
133
- fig.update_layout(
134
- xaxis_title="Total Trending Score",
135
- yaxis_title="Space ID",
136
- plot_bgcolor='white',
137
- paper_bgcolor='white',
138
- showlegend=False,
139
- margin=dict(l=200, r=20, t=40, b=40),
140
- yaxis={'categoryorder': 'total ascending'} # 큰 점수 순으로
141
- )
142
-
143
- fig.update_traces(
144
- marker_color='#4CAF50',
145
- textposition='outside',
146
- textfont=dict(size=12)
147
- )
148
-
149
- fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
150
-
151
- return fig
152
- except Exception as e:
153
- print(f"Error creating score chart: {e}")
154
- traceback.print_exc()
155
- return None
156
-
157
- ######################################
158
- # 4) 스페이스 상세/트렌드 차트
159
- ######################################
160
- def create_trend_chart(space_id, daily_ranks_df):
161
- """
162
- 선택한 id에 대한 (30일간) rank 변화를 라인차트로 표시
163
- """
164
- try:
165
- if space_id is None or daily_ranks_df.empty:
166
- return None
167
-
168
- # 해당 id 필터링
169
- space_data = daily_ranks_df[daily_ranks_df['id'] == space_id].copy()
170
- if space_data.empty:
171
- return None
172
-
173
- space_data = space_data.sort_values('date')
174
-
175
- fig = px.line(
176
- space_data,
177
- x='date',
178
- y='rank',
179
- title=f'Daily Rank Trend for {space_id}',
180
- labels={'date': 'Date', 'rank': 'Rank'},
181
- markers=True,
182
- height=400
183
- )
184
-
185
- # y축을 랭킹이 1이 가장 높은 순이므로 뒤집어서 표시하려면 range=[100, 1] 등으로 설정
186
- fig.update_layout(
187
- xaxis_title="Date",
188
- yaxis_title="Rank",
189
- yaxis=dict(
190
- range=[space_data['rank'].max()+1, 1],
191
- tickmode='linear',
192
- tick0=1,
193
- dtick=10
194
- ),
195
- hovermode='x unified',
196
- plot_bgcolor='white',
197
- paper_bgcolor='white',
198
- showlegend=False,
199
- margin=dict(t=50, r=20, b=40, l=40)
200
- )
201
-
202
- fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
203
- fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
204
-
205
- fig.update_traces(
206
- line_color='#2563eb',
207
- line_width=2,
208
- marker=dict(size=8, color='#2563eb')
209
- )
210
-
211
- return fig
212
- except Exception as e:
213
- print(f"Error creating trend chart: {e}")
214
- traceback.print_exc()
215
- return None
216
-
217
- def update_display(selection):
218
- """
219
- 사용자가 특정 id를 선택했을 때,
220
- 1) 그 id의 일간 Rank 변화를 Trend Chart로 표시
221
- 2) 상세 정보 HTML
222
- """
223
- global daily_ranks_df
224
-
225
- if not selection:
226
- return None, gr.HTML(value="<div style='text-align: center; padding: 20px; color: #666;'>Select a space to view details</div>")
227
-
228
- try:
229
- space_id = selection
230
-
231
- # 최신 데이터 (가장 마지막 날짜의, 해당 id) 하나 가져오기
232
- latest_data = daily_ranks_df[
233
- daily_ranks_df['id'] == space_id
234
- ].sort_values('date').iloc[-1]
235
-
236
- info_text = f"""
237
- <div style="padding: 16px; background-color: white; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
238
- <h3 style="margin: 0 0 12px 0;">Space Details</h3>
239
- <p style="margin: 4px 0;"><strong>ID:</strong> {space_id}</p>
240
- <p style="margin: 4px 0;"><strong>Current Rank:</strong> {int(latest_data['rank'])}</p>
241
- <p style="margin: 4px 0;"><strong>Trending Score:</strong> {latest_data['trendingScore']:.2f}</p>
242
- <p style="margin: 4px 0;"><strong>Created At:</strong> {latest_data['createdAt'].strftime('%Y-%m-%d')}</p>
243
- <p style="margin: 12px 0 0 0;">
244
- <a href="https://huggingface.co/spaces/{space_id}"
245
- target="_blank"
246
- style="color: #2563eb; text-decoration: none;">
247
- View Space ↗
248
- </a>
249
- </p>
250
- </div>
251
- """
252
-
253
- chart = create_trend_chart(space_id, daily_ranks_df)
254
-
255
- return chart, gr.HTML(value=info_text)
256
-
257
- except Exception as e:
258
- print(f"Error in update_display: {e}")
259
- return None, gr.HTML(value=f"<div style='color: red;'>Error processing data: {str(e)}</div>")
260
-
261
- ######################################
262
- # 메인
263
- ######################################
264
- print("Loading initial data...")
265
- daily_ranks_df, top_n_spaces = load_and_process_data() # 여기서 n=1000
266
- print("Data loaded successfully!")
267
-
268
- # 중복된 ID가 2번 이상 등장하는 것만 집계 -> 상위 20
269
- multiple_ids_df = get_top20_multiple_ids(top_n_spaces)
270
- score_chart = create_score_chart(multiple_ids_df)
271
-
272
- # Gradio 인터페이스
273
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
274
- gr.Markdown("""
275
- # HF Space Ranking Tracker
276
-
277
- **Note**: 이 데모는 실제 'Top 1000'을 대상으로 중복된 ID(2개 이상)를 찾아 합산 스코어를 표시합니다.
278
- 만약 데이터에 중복 ID가 없다면(또는 극히 적다면), 우측 막대 차트가 'No multiple entries found'일 수 있습니다.
279
- """)
280
-
281
- with gr.Tabs():
282
- with gr.Tab("Dashboard"):
283
- with gr.Row(variant="panel"):
284
- with gr.Column(scale=7):
285
- trend_plot = gr.Plot(
286
- label="Daily Rank Trend",
287
- container=True
288
- )
289
- with gr.Column(scale=3):
290
- score_plot = gr.Plot(
291
- value=score_chart,
292
- label="Multiple-Entry IDs (Top 20)",
293
- container=True
294
- )
295
-
296
- with gr.Row():
297
- info_box = gr.HTML(
298
- value="<div style='text-align: center; padding: 20px; color: #666;'>Select a space to view details</div>"
299
- )
300
-
301
- # 라디오 버튼 (Top n=1000)
302
- space_selection = gr.Radio(
303
- choices=[row['id'] for _, row in top_n_spaces.iterrows()],
304
- value=None,
305
- visible=False
306
- )
307
-
308
- # HTML 카드 (랭킹 순으로 표시)
309
- html_content = """
310
- <div style='display: flex; flex-wrap: wrap; gap: 16px; justify-content: center;'>
311
- """ + "".join([
312
- f"""
313
- <div class="space-card"
314
- data-space-id="{row['id']}"
315
- style="
316
- border: 1px solid #e5e7eb;
317
- border-radius: 8px;
318
- padding: 16px;
319
- margin: 8px;
320
- background-color: hsl(210, {max(30, 90 - (row['rank'] / 1000 * 60))}%, {min(97, 85 + (row['rank'] / 1000 * 10))}%);
321
- box-shadow: 0 1px 3px rgba(0,0,0,0.1);
322
- display: inline-block;
323
- width: 250px;
324
- vertical-align: top;
325
- cursor: pointer;
326
- transition: all 0.2s;
327
- "
328
- onmouseover="this.style.transform='translateY(-2px)';this.style.boxShadow='0 4px 6px rgba(0,0,0,0.1)';"
329
- onmouseout="this.style.transform='none';this.style.boxShadow='0 1px 3px rgba(0,0,0,0.1)';"
330
- >
331
- <div style="font-size: 1.2em; font-weight: bold; margin-bottom: 8px;">
332
- #{int(row['rank'])}
333
- </div>
334
- <div style="margin-bottom: 8px;">
335
- {row['id']}
336
- </div>
337
- <div style="color: #666; margin-bottom: 12px;">
338
- Score: {row['trendingScore']:.2f}
339
- </div>
340
- <div style="display: flex; gap: 8px;">
341
- <a href="https://huggingface.co/spaces/{row['id']}"
342
- target="_blank"
343
- style="padding: 6px 12px; background-color: white; color: #2563eb; text-decoration: none; border-radius: 4px; font-size: 0.9em; border: 1px solid #2563eb;"
344
- onclick="event.stopPropagation();">
345
- View Space ↗
346
- </a>
347
- <button onclick="event.preventDefault(); gradioEvent('{row['id']}');"
348
- style="padding: 6px 12px; background-color: #2563eb; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 0.9em;">
349
- View Trend
350
- </button>
351
- </div>
352
- </div>
353
- """
354
- for _, row in top_n_spaces.iterrows()
355
- ]) + """
356
- </div>
357
- <script>
358
- function gradioEvent(spaceId) {
359
- const radio = document.querySelector(`input[type="radio"][value="${spaceId}"]`);
360
- if (radio) {
361
- radio.checked = true;
362
- const event = new Event('change');
363
- radio.dispatchEvent(event);
364
- }
365
- }
366
- </script>
367
- """
368
-
369
- with gr.Row():
370
- space_grid = gr.HTML(value=html_content)
371
-
372
- with gr.Tab("About"):
373
- gr.Markdown("""
374
- ### Why might the chart be empty?
375
- - 이 데모는 Top 1000 안에서 **동일한 `id`가 2번 이상** 등장하는 경우에만 점수를 합산해 막대차트를 그립니다.
376
- - 데이터셋 상 실제로 중복된 `id`가 많지 않다면 차트가 비어있을 수 있습니다.
377
-
378
- ### What can you do?
379
- - (A) 코드를 수정해 Top 100 → 1000, 5000 등으로 늘려보거나,
380
- - (B) 아예 rank 제한 없이 전체 데이터에서 중복 여부를 확인할 수도 있습니다.
381
- - (C) 테스트용으로 가짜 중복 데이터를 만들어도 됩니다.
382
- """)
383
-
384
- space_selection.change(
385
- fn=update_display,
386
- inputs=[space_selection],
387
- outputs=[trend_plot, info_box]
388
- )
389
-
390
- if __name__ == "__main__":
391
- demo.launch(share=True)