Spaces:

fantos
/

Ranking-Tracker

Running

App Files Files Community

ginipick commited on Jan 27, 2025

Commit

3832097

verified ·

1 Parent(s): 5e373c3

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -391

app.py DELETED Viewed

@@ -1,391 +0,0 @@
-import gradio as gr
-import pandas as pd
-import plotly.express as px
-from datetime import datetime, timedelta
-import requests
-from io import BytesIO
-import traceback
-######################################
-# 1) 데이터 로드 & 30일치 랭킹 산출
-######################################
-def load_and_process_data():
-    try:
-        url = "https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/spaces.parquet"
-        response = requests.get(url)
-        df = pd.read_parquet(BytesIO(response.content))
-        # 최근 30일 기준으로 필터링
-        thirty_days_ago = datetime.now() - timedelta(days=30)
-        df['createdAt'] = pd.to_datetime(df['createdAt'])
-        df = df[df['createdAt'] >= thirty_days_ago].copy()
-        # 30일 동안의 모든 날짜에 대해 순회
-        dates = pd.date_range(start=thirty_days_ago, end=datetime.now(), freq='D')
-        daily_ranks = []
-        for date in dates:
-            # date 날짜까지 생성된 스페이스만 필터링
-            date_data = df[df['createdAt'].dt.date <= date.date()].copy()
-            # trendingScore 높은 순, id 오름차순으로 정렬 후 rank 부여
-            date_data = date_data.sort_values(['trendingScore', 'id'], ascending=[False, True])
-            date_data['rank'] = range(1, len(date_data) + 1)
-            date_data['date'] = date.date()
-            daily_ranks.append(
-                date_data[['id', 'date', 'rank', 'trendingScore', 'createdAt']]
-            )
-        daily_ranks_df = pd.concat(daily_ranks, ignore_index=True)
-        # 최신 날짜의 (랭킹 1000위 이하)만 추출 ← 여기서 범위를 1000으로 확장!
-        latest_date = daily_ranks_df['date'].max()
-        top_1000_spaces = daily_ranks_df[
-            (daily_ranks_df['date'] == latest_date) &
-            (daily_ranks_df['rank'] <= 1000)
-        ].sort_values('rank').copy()
-        return daily_ranks_df, top_1000_spaces
-    except Exception as e:
-        print(f"Error loading data: {e}")
-        traceback.print_exc()
-        return pd.DataFrame(), pd.DataFrame()
-######################################
-# 2) 중복 ID(2개 이상) 합산 -> 상위 20
-######################################
-def get_top20_multiple_ids(top_n_spaces_df):
-    """
-    주어진 데이터프레임(예: top_1000_spaces)에서,
-    동일한 id가 2번 이상 등장하는 경우 'trendingScore'를 합산하고,
-    합산 점수가 높은 순으로 상위 20개만 반환
-    """
-    if top_n_spaces_df.empty:
-        return pd.DataFrame()
-    try:
-        # id별 등장 횟수
-        id_counts = top_n_spaces_df['id'].value_counts()
-        # 2개 이상 등장하는 id만 추출
-        multiple_ids = id_counts[id_counts >= 2].index
-        if len(multiple_ids) == 0:
-            # 중복 id가 아예 없으면 빈 DF
-            return pd.DataFrame()
-        # 중복된 id에 해당하는 행만 필터링
-        multiple_entries = top_n_spaces_df[top_n_spaces_df['id'].isin(multiple_ids)].copy()
-        # id별 스코어 합산
-        df_sum = (multiple_entries
-                  .groupby('id')['trendingScore']
-                  .sum()
-                  .reset_index()
-                  .rename(columns={'trendingScore': 'total_score'}))
-        # 합산된 total_score 내림차순 정렬 -> 상위 20
-        df_sum = df_sum.sort_values(by='total_score', ascending=False).head(20)
-        return df_sum
-    except Exception as e:
-        print(f"Error in get_top20_multiple_ids: {e}")
-        traceback.print_exc()
-        return pd.DataFrame()
-######################################
-# 3) 막대 차트 생성 (상위 20개)
-######################################
-def create_score_chart(multiple_ids_df):
-    """
-    multiple_ids_df = [ id, total_score ] 형태
-    """
-    try:
-        if multiple_ids_df.empty:
-            # 중복된 id가 전혀 없는 경우 (or 무엇인가 잘못된 경우)
-            placeholder_df = pd.DataFrame({"id": ["No multiple entries"], "total_score": [0]})
-            fig = px.bar(
-                placeholder_df,
-                x="total_score",
-                y="id",
-                orientation='h'
-            )
-            fig.update_layout(
-                title="No multiple entries found (in Top 1000)",
-                xaxis_title="Total Trending Score",
-                yaxis_title="Space ID",
-                plot_bgcolor='white',
-                paper_bgcolor='white',
-                showlegend=False,
-                margin=dict(l=200, r=20, t=40, b=40),
-            )
-            return fig
-        # 막대 차트 생성
-        fig = px.bar(
-            multiple_ids_df,
-            y='id',
-            x='total_score',
-            orientation='h',
-            title="Top 20 IDs with Multiple Entries (Rank ≤ 1000)",
-            text=[f"{score:.2f}" for score in multiple_ids_df['total_score']],
-            height=500
-        )
-        fig.update_layout(
-            xaxis_title="Total Trending Score",
-            yaxis_title="Space ID",
-            plot_bgcolor='white',
-            paper_bgcolor='white',
-            showlegend=False,
-            margin=dict(l=200, r=20, t=40, b=40),
-            yaxis={'categoryorder': 'total ascending'}  # 큰 점수 순으로
-        )
-        fig.update_traces(
-            marker_color='#4CAF50',
-            textposition='outside',
-            textfont=dict(size=12)
-        )
-        fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
-        return fig
-    except Exception as e:
-        print(f"Error creating score chart: {e}")
-        traceback.print_exc()
-        return None
-######################################
-# 4) 스페이스 상세/트렌드 차트
-######################################
-def create_trend_chart(space_id, daily_ranks_df):
-    """
-    선택한 id에 대한 (30일간) rank 변화를 라인차트로 표시
-    """
-    try:
-        if space_id is None or daily_ranks_df.empty:
-            return None
-        # 해당 id 필터링
-        space_data = daily_ranks_df[daily_ranks_df['id'] == space_id].copy()
-        if space_data.empty:
-            return None
-        space_data = space_data.sort_values('date')
-        fig = px.line(
-            space_data,
-            x='date',
-            y='rank',
-            title=f'Daily Rank Trend for {space_id}',
-            labels={'date': 'Date', 'rank': 'Rank'},
-            markers=True,
-            height=400
-        )
-        # y축을 랭킹이 1이 가장 높은 순이므로 뒤집어서 표시하려면 range=[100, 1] 등으로 설정
-        fig.update_layout(
-            xaxis_title="Date",
-            yaxis_title="Rank",
-            yaxis=dict(
-                range=[space_data['rank'].max()+1, 1],
-                tickmode='linear',
-                tick0=1,
-                dtick=10
-            ),
-            hovermode='x unified',
-            plot_bgcolor='white',
-            paper_bgcolor='white',
-            showlegend=False,
-            margin=dict(t=50, r=20, b=40, l=40)
-        )
-        fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
-        fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
-        fig.update_traces(
-            line_color='#2563eb',
-            line_width=2,
-            marker=dict(size=8, color='#2563eb')
-        )
-        return fig
-    except Exception as e:
-        print(f"Error creating trend chart: {e}")
-        traceback.print_exc()
-        return None
-def update_display(selection):
-    """
-    사용자가 특정 id를 선택했을 때,
-    1) 그 id의 일간 Rank 변화를 Trend Chart로 표시
-    2) 상세 정보 HTML
-    """
-    global daily_ranks_df
-    if not selection:
-        return None, gr.HTML(value="<div style='text-align: center; padding: 20px; color: #666;'>Select a space to view details</div>")
-    try:
-        space_id = selection
-        # 최신 데이터 (가장 마지막 날짜의, 해당 id) 하나 가져오기
-        latest_data = daily_ranks_df[
-            daily_ranks_df['id'] == space_id
-        ].sort_values('date').iloc[-1]
-        info_text = f"""
-        <div style="padding: 16px; background-color: white; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
-            <h3 style="margin: 0 0 12px 0;">Space Details</h3>
-            <p style="margin: 4px 0;"><strong>ID:</strong> {space_id}</p>
-            <p style="margin: 4px 0;"><strong>Current Rank:</strong> {int(latest_data['rank'])}</p>
-            <p style="margin: 4px 0;"><strong>Trending Score:</strong> {latest_data['trendingScore']:.2f}</p>
-            <p style="margin: 4px 0;"><strong>Created At:</strong> {latest_data['createdAt'].strftime('%Y-%m-%d')}</p>
-            <p style="margin: 12px 0 0 0;">
-                <a href="https://huggingface.co/spaces/{space_id}"
-                   target="_blank"
-                   style="color: #2563eb; text-decoration: none;">
-                    View Space ↗
-                </a>
-            </p>
-        </div>
-        """
-        chart = create_trend_chart(space_id, daily_ranks_df)
-        return chart, gr.HTML(value=info_text)
-    except Exception as e:
-        print(f"Error in update_display: {e}")
-        return None, gr.HTML(value=f"<div style='color: red;'>Error processing data: {str(e)}</div>")
-######################################
-# 메인
-######################################
-print("Loading initial data...")
-daily_ranks_df, top_n_spaces = load_and_process_data()  # 여기서 n=1000
-print("Data loaded successfully!")
-# 중복된 ID가 2번 이상 등장하는 것만 집계 -> 상위 20
-multiple_ids_df = get_top20_multiple_ids(top_n_spaces)
-score_chart = create_score_chart(multiple_ids_df)
-# Gradio 인터페이스
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # HF Space Ranking Tracker
-    **Note**: 이 데모는 실제 'Top 1000'을 대상으로 중복된 ID(2개 이상)를 찾아 합산 스코어를 표시합니다.
-    만약 데이터에 중복 ID가 없다면(또는 극히 적다면), 우측 막대 차트가 'No multiple entries found'일 수 있습니다.
-    """)
-    with gr.Tabs():
-        with gr.Tab("Dashboard"):
-            with gr.Row(variant="panel"):
-                with gr.Column(scale=7):
-                    trend_plot = gr.Plot(
-                        label="Daily Rank Trend",
-                        container=True
-                    )
-                with gr.Column(scale=3):
-                    score_plot = gr.Plot(
-                        value=score_chart,
-                        label="Multiple-Entry IDs (Top 20)",
-                        container=True
-                    )
-            with gr.Row():
-                info_box = gr.HTML(
-                    value="<div style='text-align: center; padding: 20px; color: #666;'>Select a space to view details</div>"
-                )
-            # 라디오 버튼 (Top n=1000)
-            space_selection = gr.Radio(
-                choices=[row['id'] for _, row in top_n_spaces.iterrows()],
-                value=None,
-                visible=False
-            )
-            # HTML 카드 (랭킹 순으로 표시)
-            html_content = """
-            <div style='display: flex; flex-wrap: wrap; gap: 16px; justify-content: center;'>
-            """ + "".join([
-                f"""
-                <div class="space-card"
-                     data-space-id="{row['id']}"
-                     style="
-                    border: 1px solid #e5e7eb;
-                    border-radius: 8px;
-                    padding: 16px;
-                    margin: 8px;
-                    background-color: hsl(210, {max(30, 90 - (row['rank'] / 1000 * 60))}%, {min(97, 85 + (row['rank'] / 1000 * 10))}%);
-                    box-shadow: 0 1px 3px rgba(0,0,0,0.1);
-                    display: inline-block;
-                    width: 250px;
-                    vertical-align: top;
-                    cursor: pointer;
-                    transition: all 0.2s;
-                "
-                onmouseover="this.style.transform='translateY(-2px)';this.style.boxShadow='0 4px 6px rgba(0,0,0,0.1)';"
-                onmouseout="this.style.transform='none';this.style.boxShadow='0 1px 3px rgba(0,0,0,0.1)';"
-                >
-                    <div style="font-size: 1.2em; font-weight: bold; margin-bottom: 8px;">
-                        #{int(row['rank'])}
-                    </div>
-                    <div style="margin-bottom: 8px;">
-                        {row['id']}
-                    </div>
-                    <div style="color: #666; margin-bottom: 12px;">
-                        Score: {row['trendingScore']:.2f}
-                    </div>
-                    <div style="display: flex; gap: 8px;">
-                        <a href="https://huggingface.co/spaces/{row['id']}"
-                           target="_blank"
-                           style="padding: 6px 12px; background-color: white; color: #2563eb; text-decoration: none; border-radius: 4px; font-size: 0.9em; border: 1px solid #2563eb;"
-                           onclick="event.stopPropagation();">
-                            View Space ↗
-                        </a>
-                        <button onclick="event.preventDefault(); gradioEvent('{row['id']}');"
-                                style="padding: 6px 12px; background-color: #2563eb; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 0.9em;">
-                            View Trend
-                        </button>
-                    </div>
-                </div>
-                """
-                for _, row in top_n_spaces.iterrows()
-            ]) + """
-            </div>
-            <script>
-            function gradioEvent(spaceId) {
-                const radio = document.querySelector(`input[type="radio"][value="${spaceId}"]`);
-                if (radio) {
-                    radio.checked = true;
-                    const event = new Event('change');
-                    radio.dispatchEvent(event);
-                }
-            }
-            </script>
-            """
-            with gr.Row():
-                space_grid = gr.HTML(value=html_content)
-        with gr.Tab("About"):
-            gr.Markdown("""
-            ### Why might the chart be empty?
-            - 이 데모는 Top 1000 안에서 **동일한 `id`가 2번 이상** 등장하는 경우에만 점수를 합산해 막대차트를 그립니다.
-            - 데이터셋 상 실제로 중복된 `id`가 많지 않다면 차트가 비어있을 수 있습니다.
-            ### What can you do?
-            - (A) 코드를 수정해 Top 100 → 1000, 5000 등으로 늘려보거나,
-            - (B) 아예 rank 제한 없이 전체 데이터에서 중복 여부를 확인할 수도 있습니다.
-            - (C) 테스트용으로 가짜 중복 데이터를 만들어도 됩니다.
-            """)
-    space_selection.change(
-        fn=update_display,
-        inputs=[space_selection],
-        outputs=[trend_plot, info_box]
-    )
-if __name__ == "__main__":
-    demo.launch(share=True)