Spaces:

PIA-SPACE-LAB
/

PIA-SPACE_LeaderBoard

Runtime error

App Files Files Community

jisujang commited on Jan 21, 2025

Commit

a005c19

1 Parent(s): a454ab2

first

Browse files

Files changed (29) hide show

.gitignore +181 -0
app.py +21 -0
enviroments/.gitkeep +0 -0
enviroments/config.py +53 -0
enviroments/convert.py +54 -0
leaderboard_ui/tab/dataset_visual_tab.py +160 -0
leaderboard_ui/tab/leaderboard_tab.py +52 -0
leaderboard_ui/tab/metric_visaul_tab.py +418 -0
leaderboard_ui/tab/submit_tab.py +103 -0
main.py +76 -0
pia_bench/bench.py +157 -0
pia_bench/checker/bench_checker.py +184 -0
pia_bench/checker/sheet_checker.py +284 -0
pia_bench/event_alarm.py +225 -0
pia_bench/metric.py +322 -0
pia_bench/pipe_line/piepline.py +227 -0
requirements.txt +15 -0
sample.csv +99 -0
sheet_manager/sheet_checker/sheet_check.py +140 -0
sheet_manager/sheet_convert/json2sheet.py +117 -0
sheet_manager/sheet_crud/create_col.py +76 -0
sheet_manager/sheet_crud/sheet_crud.py +347 -0
sheet_manager/sheet_loader/sheet2df.py +52 -0
sheet_manager/sheet_monitor/sheet_sync.py +205 -0
topk.json +88 -0
utils/bench_meta.py +72 -0
utils/except_dir.py +15 -0
utils/hf_api.py +103 -0
utils/parser.py +65 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,181 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# PyPI configuration file
+.pypirc
+# *.json
+assets
+DevMACS-AI-solution-devmacs
+Research-AI-research-t2v_f1score_evaluator
+.env
+enviroments/abnormal-situation-leaderboard-3ca42d06719e.json
+leaderboard_test
+enviroments/deep-byte-352904-a072fdf439e7.json

app.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import gradio as gr
+from pathlib import Path
+from leaderboard_ui.tab.submit_tab import submit_tab
+from leaderboard_ui.tab.leaderboard_tab import leaderboard_tab
+from leaderboard_ui.tab.dataset_visual_tab import visual_tab
+from leaderboard_ui.tab.metric_visaul_tab import metric_visual_tab
+abs_path = Path(__file__).parent
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    # 🥇 PIA_leaderboard
+    """)
+    with gr.Tabs():
+        leaderboard_tab()
+        submit_tab()
+        visual_tab()
+        metric_visual_tab()
+if __name__ == "__main__":
+    demo.launch()

enviroments/.gitkeep ADDED Viewed

File without changes

enviroments/config.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import pandas as pd
+EXCLUDE_DIRS = {"@eaDir", 'temp'}
+TYPES = [
+    "markdown",
+    "markdown",
+    "number",
+    "number",
+    "number",
+    "number",
+    "number",
+    "number",
+    "number",
+    "str",
+    "str",
+    "str",
+    "str",
+    "bool",
+    "str",
+    "number",
+    "number",
+    "bool",
+    "str",
+    "bool",
+    "bool",
+    "str",
+]
+ON_LOAD_COLUMNS = [
+    "TASK",
+    "Model",
+    "PIA"            # 모델 이름
+]
+OFF_LOAD_COLUMNS = ["Model link", "PIA", "PIA * 100" , "Model name" ]
+HIDE_COLUMNS = ["PIA * 100"]
+FILTER_COLUMNS = ["T"]
+NUMERIC_COLUMNS = ["PIA"]
+NUMERIC_INTERVALS = {
+    "?": pd.Interval(-1, 0, closed="right"),
+    "~1.5": pd.Interval(0, 2, closed="right"),
+    "~3": pd.Interval(2, 4, closed="right"),
+    "~7": pd.Interval(4, 9, closed="right"),
+    "~13": pd.Interval(9, 20, closed="right"),
+    "~35": pd.Interval(20, 45, closed="right"),
+    "~60": pd.Interval(45, 70, closed="right"),
+    "70+": pd.Interval(70, 10000, closed="right"),
+}

enviroments/convert.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+import json
+from dotenv import load_dotenv
+load_dotenv()
+def get_json_from_env_var(env_var_name):
+    """
+    환경 변수에서 JSON 데이터를 가져와 딕셔너리로 변환하는 함수.
+    :param env_var_name: 환경 변수 이름
+    :return: 딕셔너리 형태의 JSON 데이터
+    """
+    json_string = os.getenv(env_var_name)
+    if not json_string:
+        raise EnvironmentError(f"환경 변수 '{env_var_name}'가 설정되지 않았습니다.")
+    try:
+        # 줄바꿈(\n)을 이스케이프 문자(\\n)로 변환
+        json_string = json_string.replace("\n", "\\n")
+        # JSON 문자열을 딕셔너리로 변환
+        json_data = json.loads(json_string)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"JSON 변환 실패: {e}")
+    return json_data
+def json_to_env_var(json_file_path, env_var_name="JSON_ENV_VAR"):
+    """
+    주어진 JSON 파일의 데이터를 환경 변수 형태로 변환하여 출력하는 함수.
+    :param json_file_path: JSON 파일 경로
+    :param env_var_name: 환경 변수 이름 (기본값: JSON_ENV_VAR)
+    :return: None
+    """
+    try:
+        # JSON 파일 읽기
+        with open(json_file_path, 'r') as json_file:
+            json_data = json.load(json_file)
+        # JSON 데이터를 문자열로 변환
+        json_string = json.dumps(json_data)
+        # 환경 변수 형태로 출력
+        env_variable = f'{env_var_name}={json_string}'
+        print("\n환경 변수로 사용할 수 있는 출력값:\n")
+        print(env_variable)
+        print("\n위 값을 .env 파일에 복사하여 붙여넣으세요.")
+    except FileNotFoundError:
+        print(f"파일을 찾을 수 없습니다: {json_file_path}")
+    except json.JSONDecodeError:
+        print(f"유효한 JSON 파일이 아닙니다: {json_file_path}")

leaderboard_ui/tab/dataset_visual_tab.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import gradio as gr
+from pathlib import Path
+from leaderboard_ui.tab.submit_tab import submit_tab
+from leaderboard_ui.tab.leaderboard_tab import leaderboard_tab
+abs_path = Path(__file__).parent
+import plotly.express as px
+import plotly.graph_objects as go
+import pandas as pd
+import numpy as np
+from utils.bench_meta import process_videos_in_directory
+# Mock 데이터 생성
+def create_mock_data():
+    benchmarks = ['VQA-2023', 'ImageQuality-2024', 'VideoEnhance-2024']
+    categories = ['Animation', 'Game', 'Movie', 'Sports', 'Vlog']
+    data_list = []
+    for benchmark in benchmarks:
+        n_videos = np.random.randint(50, 100)
+        for _ in range(n_videos):
+            category = np.random.choice(categories)
+            data_list.append({
+                "video_name": f"video_{np.random.randint(1000, 9999)}.mp4",
+                "resolution": np.random.choice(["1920x1080", "3840x2160", "1280x720"]),
+                "video_duration": f"{np.random.randint(0, 10)}:{np.random.randint(0, 60)}",
+                "category": category,
+                "benchmark": benchmark,
+                "duration_seconds": np.random.randint(30, 600),
+                "total_frames": np.random.randint(1000, 10000),
+                "file_format": ".mp4",
+                "file_size_mb": round(np.random.uniform(10, 1000), 2),
+                "aspect_ratio": 16/9,
+                "fps": np.random.choice([24, 30, 60])
+            })
+    return pd.DataFrame(data_list)
+# Mock 데이터 생성
+# df = process_videos_in_directory("/home/piawsa6000/nas192/videos/huggingface_benchmarks_dataset/Leaderboard_bench")
+df = pd.read_csv("sample.csv")
+print("DataFrame shape:", df.shape)
+print("DataFrame columns:", df.columns)
+print("DataFrame head:\n", df.head())
+def create_category_pie_chart(df, selected_benchmark, selected_categories=None):
+    filtered_df = df[df['benchmark'] == selected_benchmark]
+    if selected_categories:
+        filtered_df = filtered_df[filtered_df['category'].isin(selected_categories)]
+    category_counts = filtered_df['category'].value_counts()
+    fig = px.pie(
+        values=category_counts.values,
+        names=category_counts.index,
+        title=f'{selected_benchmark} - Video Distribution by Category',
+        hole=0.3
+    )
+    fig.update_traces(textposition='inside', textinfo='percent+label')
+    return fig
+###TODO 스트링일경우 어케 처리
+def create_bar_chart(df, selected_benchmark, selected_categories, selected_column):
+    # Filter by benchmark and categories
+    filtered_df = df[df['benchmark'] == selected_benchmark]
+    if selected_categories:
+        filtered_df = filtered_df[filtered_df['category'].isin(selected_categories)]
+    # Create bar chart for selected column
+    fig = px.bar(
+        filtered_df,
+        x=selected_column,
+        y='video_name',
+        color='category',  # Color by category
+        title=f'{selected_benchmark} - Video {selected_column}',
+        orientation='h',  # Horizontal bar chart
+        color_discrete_sequence=px.colors.qualitative.Set3  # Color palette
+    )
+    # Adjust layout
+    fig.update_layout(
+        height=max(400, len(filtered_df) * 30),  # Adjust height based on data
+        yaxis={'categoryorder': 'total ascending'},  # Sort by value
+        margin=dict(l=200),  # Margin for long video names
+        showlegend=True,  # Show legend
+        legend=dict(
+            orientation="h",  # Horizontal legend
+            yanchor="bottom",
+            y=1.02,  # Place legend above graph
+            xanchor="right",
+            x=1
+        )
+    )
+    return fig
+def submit_tab():
+    with gr.Tab("🚀 Submit here! "):
+        with gr.Row():
+            gr.Markdown("# ✉️✨ Submit your Result here!")
+def visual_tab():
+    with gr.Tab("📊 Bench Info"):
+        with gr.Row():
+            benchmark_dropdown = gr.Dropdown(
+                choices=sorted(df['benchmark'].unique().tolist()),
+                value=sorted(df['benchmark'].unique().tolist())[0],
+                label="Select Benchmark",
+                interactive=True
+            )
+            category_multiselect = gr.CheckboxGroup(
+                choices=sorted(df['category'].unique().tolist()),
+                label="Select Categories (empty for all)",
+                interactive=True
+            )
+        # Pie chart
+        pie_plot_output = gr.Plot(label="pie")
+        # Column selection dropdown
+        column_options = [
+            "video_duration", "duration_seconds", "total_frames",
+            "file_size_mb", "aspect_ratio", "fps", "file_format"
+        ]
+        column_dropdown = gr.Dropdown(
+            choices=column_options,
+            value=column_options[0],
+            label="Select Data to Compare",
+            interactive=True
+        )
+        # Bar chart
+        bar_plot_output = gr.Plot(label="video")
+        def update_plots(benchmark, categories, selected_column):
+            pie_chart = create_category_pie_chart(df, benchmark, categories)
+            bar_chart = create_bar_chart(df, benchmark, categories, selected_column)
+            return pie_chart, bar_chart
+        # Connect event handlers
+        benchmark_dropdown.change(
+            fn=update_plots,
+            inputs=[benchmark_dropdown, category_multiselect, column_dropdown],
+            outputs=[pie_plot_output, bar_plot_output]
+        )
+        category_multiselect.change(
+            fn=update_plots,
+            inputs=[benchmark_dropdown, category_multiselect, column_dropdown],
+            outputs=[pie_plot_output, bar_plot_output]
+        )
+        column_dropdown.change(
+            fn=update_plots,
+            inputs=[benchmark_dropdown, category_multiselect, column_dropdown],
+            outputs=[pie_plot_output, bar_plot_output]
+        )

leaderboard_ui/tab/leaderboard_tab.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import gradio as gr
+from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter,SearchColumns
+import enviroments.config as config
+from sheet_manager.sheet_loader.sheet2df import sheet2df
+def leaderboard_tab():
+    with gr.Tab("🏆Leaderboard"):
+                leaderboard = Leaderboard(
+                    value=sheet2df(),
+                    select_columns=SelectColumns(
+                        default_selection=config.ON_LOAD_COLUMNS,
+                        cant_deselect=config.OFF_LOAD_COLUMNS,
+                        label="Select Columns to Display:",
+                        info="Check"
+                    ),
+                    search_columns=SearchColumns(
+                        primary_column="Model name",
+                        secondary_columns=["TASK"],
+                        placeholder="Search",
+                        label="Search"
+                    ),
+                    hide_columns=config.HIDE_COLUMNS,
+                    filter_columns=[
+                        ColumnFilter(
+                            column= "TASK",
+                        ),
+                        ColumnFilter(
+                            column="PIA * 100",
+                            type="slider",
+                            min=0,  # 77
+                            max=100,  # 92
+                            # default=[min_val, max_val],
+                            default = [77 ,92],
+                            label="PIA"  # 실제 값의 100배로 표시됨,
+                        )
+                    ],
+                    datatype=config.TYPES,
+                    # column_widths=["33%", "10%"],
+                )
+                refresh_button = gr.Button("🔄 Refresh Leaderboard")
+                def refresh_leaderboard():
+                    return sheet2df()
+                refresh_button.click(
+                    refresh_leaderboard,
+                    inputs=[],
+                    outputs=leaderboard,
+                )

leaderboard_ui/tab/metric_visaul_tab.py ADDED Viewed

	@@ -0,0 +1,418 @@

+import gradio as gr
+from pathlib import Path
+abs_path = Path(__file__).parent
+import plotly.express as px
+import plotly.graph_objects as go
+import pandas as pd
+import numpy as np
+from sheet_manager.sheet_loader.sheet2df import sheet2df
+from sheet_manager.sheet_convert.json2sheet import str2json
+# Mock 데이터 생성
+def calculate_avg_metrics(df):
+    """
+    각 모델의 카테고리별 평균 성능 지표를 계산
+    """
+    metrics_data = []
+    for _, row in df.iterrows():
+        model_name = row['Model name']
+        # PIA가 비어있거나 다른 값인 경우 건너뛰기
+        if pd.isna(row['PIA']) or not isinstance(row['PIA'], str):
+            print(f"Skipping model {model_name}: Invalid PIA data")
+            continue
+        try:
+            metrics = str2json(row['PIA'])
+            # metrics가 None이거나 dict가 아닌 경우 건너뛰기
+            if not metrics or not isinstance(metrics, dict):
+                print(f"Skipping model {model_name}: Invalid JSON format")
+                continue
+            # 필요한 카테고리가 모두 있는지 확인
+            required_categories = ['falldown', 'violence', 'fire']
+            if not all(cat in metrics for cat in required_categories):
+                print(f"Skipping model {model_name}: Missing required categories")
+                continue
+            # 필요한 메트릭이 모두 있는지 확인
+            required_metrics = ['accuracy', 'precision', 'recall', 'specificity', 'f1',
+                              'balanced_accuracy', 'g_mean', 'mcc', 'npv', 'far']
+            avg_metrics = {}
+            for metric in required_metrics:
+                try:
+                    values = [metrics[cat][metric] for cat in required_categories
+                             if metric in metrics[cat]]
+                    if values:  # 값이 있는 경우만 평균 계산
+                        avg_metrics[metric] = sum(values) / len(values)
+                    else:
+                        avg_metrics[metric] = 0  # 또는 다른 기본값 설정
+                except (KeyError, TypeError) as e:
+                    print(f"Error calculating {metric} for {model_name}: {str(e)}")
+                    avg_metrics[metric] = 0  # 에러 발생 시 기본값 설정
+            metrics_data.append({
+                'model_name': model_name,
+                **avg_metrics
+            })
+        except Exception as e:
+            print(f"Error processing model {model_name}: {str(e)}")
+            continue
+    return pd.DataFrame(metrics_data)
+def create_performance_chart(df, selected_metrics):
+    """
+    모델별 선택된 성능 지표의 수평 막대 그래프 생성
+    """
+    fig = go.Figure()
+    # 모델 이름 길이에 따른 마진 계산
+    max_name_length = max([len(name) for name in df['model_name']])
+    left_margin = min(max_name_length * 7, 500)  # 글자 수에 따라 마진 조정, 최대 500
+    for metric in selected_metrics:
+        fig.add_trace(go.Bar(
+            name=metric,
+            y=df['model_name'],  # y축에 모델 이름
+            x=df[metric],        # x축에 성능 지표 값
+            text=[f'{val:.3f}' for val in df[metric]],
+            textposition='auto',
+            orientation='h'      # 수평 방향 막대
+        ))
+    fig.update_layout(
+        title='Model Performance Comparison',
+        yaxis_title='Model Name',
+        xaxis_title='Performance',
+        barmode='group',
+        height=max(400, len(df) * 40),  # 모델 수에 따라 높이 조정
+        margin=dict(l=left_margin, r=50, t=50, b=50),  # 왼쪽 마진 동적 조정
+        showlegend=True,
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1
+        ),
+        yaxis={'categoryorder': 'total ascending'}  # 성능 순으로 정렬
+    )
+    # y축 레이블 스타일 조정
+    fig.update_yaxes(tickfont=dict(size=10))  # 글자 크기 조정
+    return fig
+def create_confusion_matrix(metrics_data, selected_category):
+    """혼동 행렬 시각화 생성"""
+    # 선택된 카테고리의 혼동 행렬 데이터
+    tp = metrics_data[selected_category]['tp']
+    tn = metrics_data[selected_category]['tn']
+    fp = metrics_data[selected_category]['fp']
+    fn = metrics_data[selected_category]['fn']
+    # 혼동 행렬 데이터
+    z = [[tn, fp], [fn, tp]]
+    x = ['Negative', 'Positive']
+    y = ['Negative', 'Positive']
+    # 히트맵 생성
+    fig = go.Figure(data=go.Heatmap(
+        z=z,
+        x=x,
+        y=y,
+        colorscale=[[0, '#f7fbff'], [1, '#08306b']],
+        showscale=False,
+        text=[[str(val) for val in row] for row in z],
+        texttemplate="%{text}",
+        textfont={"color": "black", "size": 16},  # 글자 색���을 검정색으로 고정
+    ))
+    # 레이아웃 업데이트
+    fig.update_layout(
+        title={
+            'text': f'Confusion Matrix - {selected_category}',
+            'y':0.9,
+            'x':0.5,
+            'xanchor': 'center',
+            'yanchor': 'top'
+        },
+        xaxis_title='Predicted',
+        yaxis_title='Actual',
+        width=600,  # 너비 증가
+        height=600,  # 높이 증가
+        margin=dict(l=80, r=80, t=100, b=80),  # 여백 조정
+        paper_bgcolor='white',
+        plot_bgcolor='white',
+        font=dict(size=14)  # 전체 폰트 크기 조정
+    )
+    # 축 설정
+    fig.update_xaxes(side="bottom", tickfont=dict(size=14))
+    fig.update_yaxes(side="left", tickfont=dict(size=14))
+    return fig
+def get_metrics_for_model(df, model_name, benchmark_name):
+    """특정 모델과 벤치마크에 대한 메트릭스 데이터 추출"""
+    row = df[(df['Model name'] == model_name) & (df['Benchmark'] == benchmark_name)]
+    if not row.empty:
+        metrics = str2json(row['PIA'].iloc[0])
+        return metrics
+    return None
+def metric_visual_tab():
+    # 데이터 로드
+    df = sheet2df(sheet_name="metric")
+    avg_metrics_df = calculate_avg_metrics(df)
+    # 가능한 모든 메트릭 리스트
+    all_metrics = ['accuracy', 'precision', 'recall', 'specificity', 'f1',
+                  'balanced_accuracy', 'g_mean', 'mcc', 'npv', 'far']
+    with gr.Tab("📊 Performance Visualization"):
+        with gr.Row():
+            metrics_multiselect = gr.CheckboxGroup(
+                choices=all_metrics,
+                value=[],  # 초기 선택 없음
+                label="Select Performance Metrics",
+                interactive=True
+            )
+        # Performance comparison chart (초기값 없음)
+        performance_plot = gr.Plot()
+        def update_plot(selected_metrics):
+            if not selected_metrics:  # 선택된 메트릭이 없는 경우
+                return None
+            try:
+                # accuracy 기준으로 정렬
+                sorted_df = avg_metrics_df.sort_values(by='accuracy', ascending=True)
+                return create_performance_chart(sorted_df, selected_metrics)
+            except Exception as e:
+                print(f"Error in update_plot: {str(e)}")
+                return None
+        # Connect event handler
+        metrics_multiselect.change(
+            fn=update_plot,
+            inputs=[metrics_multiselect],
+            outputs=[performance_plot]
+        )
+def create_category_metrics_chart(metrics_data, selected_metrics):
+    """
+    선택된 모델의 각 카테고리별 성능 지표 시각화
+    """
+    fig = go.Figure()
+    categories = ['falldown', 'violence', 'fire']
+    for metric in selected_metrics:
+        values = []
+        for category in categories:
+            values.append(metrics_data[category][metric])
+        fig.add_trace(go.Bar(
+            name=metric,
+            x=categories,
+            y=values,
+            text=[f'{val:.3f}' for val in values],
+            textposition='auto',
+        ))
+    fig.update_layout(
+        title='Performance Metrics by Category',
+        xaxis_title='Category',
+        yaxis_title='Score',
+        barmode='group',
+        height=500,
+        showlegend=True,
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1
+        )
+    )
+    return fig
+def metric_visual_tab():
+    # 데이터 로드 및 첫 번째 시각화 부분
+    df = sheet2df(sheet_name="metric")
+    avg_metrics_df = calculate_avg_metrics(df)
+    # 가능한 모든 메트릭 리스트
+    all_metrics = ['accuracy', 'precision', 'recall', 'specificity', 'f1',
+                    'balanced_accuracy', 'g_mean', 'mcc', 'npv', 'far']
+    with gr.Tab("📊 Performance Visualization"):
+        with gr.Row():
+            metrics_multiselect = gr.CheckboxGroup(
+                choices=all_metrics,
+                value=[],  # 초기 선택 없음
+                label="Select Performance Metrics",
+                interactive=True
+            )
+        performance_plot = gr.Plot()
+        def update_plot(selected_metrics):
+            if not selected_metrics:
+                return None
+            try:
+                sorted_df = avg_metrics_df.sort_values(by='accuracy', ascending=True)
+                return create_performance_chart(sorted_df, selected_metrics)
+            except Exception as e:
+                print(f"Error in update_plot: {str(e)}")
+                return None
+        metrics_multiselect.change(
+            fn=update_plot,
+            inputs=[metrics_multiselect],
+            outputs=[performance_plot]
+        )
+        # 두 번째 시각화 섹션
+        gr.Markdown("## Detailed Model Analysis")
+        with gr.Row():
+            # 모델 선택
+            model_dropdown = gr.Dropdown(
+                choices=sorted(df['Model name'].unique().tolist()),
+                label="Select Model",
+                interactive=True
+            )
+            # 컬럼 선택 (Model name 제외)
+            column_dropdown = gr.Dropdown(
+                choices=[col for col in df.columns if col != 'Model name'],
+                label="Select Metric Column",
+                interactive=True
+            )
+            # 카테고리 선택
+            category_dropdown = gr.Dropdown(
+                choices=['falldown', 'violence', 'fire'],
+                label="Select Category",
+                interactive=True
+            )
+            # 혼동 행렬 시각화
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("") # 빈 공간
+            with gr.Column(scale=2):
+                confusion_matrix_plot = gr.Plot(container=True)  # container=True 추가
+            with gr.Column(scale=1):
+                gr.Markdown("") # 빈 공간
+        with gr.Column(scale=2):
+            # 성능 지표 선택
+            metrics_select = gr.CheckboxGroup(
+                choices=['accuracy', 'precision', 'recall', 'specificity', 'f1',
+                        'balanced_accuracy', 'g_mean', 'mcc', 'npv', 'far'],
+                value=['accuracy'],  # 기본값
+                label="Select Metrics to Display",
+                interactive=True
+            )
+            category_metrics_plot = gr.Plot()
+        def update_visualizations(model, column, category, selected_metrics):
+            if not all([model, column]):  # category는 혼동행렬에만 필요
+                return None, None
+            try:
+                # 선택된 모델의 데이터 가져오기
+                selected_data = df[df['Model name'] == model][column].iloc[0]
+                metrics = str2json(selected_data)
+                if not metrics:
+                    return None, None
+                # 혼동 행렬 (왼쪽)
+                confusion_fig = create_confusion_matrix(metrics, category) if category else None
+                # 카테고리별 성능 지표 (오른쪽)
+                if not selected_metrics:
+                    selected_metrics = ['accuracy']
+                category_fig = create_category_metrics_chart(metrics, selected_metrics)
+                return confusion_fig, category_fig
+            except Exception as e:
+                print(f"Error updating visualizations: {str(e)}")
+                return None, None
+        # 이벤트 핸들러 연결
+        for input_component in [model_dropdown, column_dropdown, category_dropdown, metrics_select]:
+            input_component.change(
+                fn=update_visualizations,
+                inputs=[model_dropdown, column_dropdown, category_dropdown, metrics_select],
+                outputs=[confusion_matrix_plot, category_metrics_plot]
+            )
+        # def update_confusion_matrix(model, column, category):
+        #     if not all([model, column, category]):
+        #         return None
+        #     try:
+        #         # 선택된 모델의 데이터 가져오기
+        #         selected_data = df[df['Model name'] == model][column].iloc[0]
+        #         metrics = str2json(selected_data)
+        #         if metrics and category in metrics:
+        #             category_data = metrics[category]
+        #             # 혼동 행렬 데이터
+        #             confusion_data = {
+        #                 'tp': category_data['tp'],
+        #                 'tn': category_data['tn'],
+        #                 'fp': category_data['fp'],
+        #                 'fn': category_data['fn']
+        #             }
+        #             # 히트맵 생성
+        #             z = [[confusion_data['tn'], confusion_data['fp']],
+        #                     [confusion_data['fn'], confusion_data['tp']]]
+        #             fig = go.Figure(data=go.Heatmap(
+        #                 z=z,
+        #                 x=['Negative', 'Positive'],
+        #                 y=['Negative', 'Positive'],
+        #                 text=[[str(val) for val in row] for row in z],
+        #                 texttemplate="%{text}",
+        #                 textfont={"size": 16},
+        #                 colorscale='Blues',
+        #                 showscale=False
+        #             ))
+        #             fig.update_layout(
+        #                 title=f'Confusion Matrix - {category}',
+        #                 xaxis_title='Predicted',
+        #                 yaxis_title='Actual',
+        #                 width=500,
+        #                 height=500
+        #             )
+        #             return fig
+        #     except Exception as e:
+        #         print(f"Error updating confusion matrix: {str(e)}")
+        #         return None
+        # # 이벤트 핸들러 연결
+        # for dropdown in [model_dropdown, column_dropdown, category_dropdown]:
+        #     dropdown.change(
+        #         fn=update_confusion_matrix,
+        #         inputs=[model_dropdown, column_dropdown, category_dropdown],
+        #         outputs=confusion_matrix_plot
+        #     )

leaderboard_ui/tab/submit_tab.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import gradio as gr
+from sheet_manager.sheet_crud.sheet_crud import  SheetManager
+import pandas as pd
+def list_to_dataframe(data):
+    """
+    리스트 데이터를 데이터프레임으로 변환하는 함수.
+    각 값이 데이터프레임의 한 행(row)에 들어가도록 설정.
+    :param data: 리스트 형태의 데이터
+    :return: pandas.DataFrame
+    """
+    if not isinstance(data, list):
+        raise ValueError("입력 데이터는 리스트 형태여야 합니다.")
+    # 열 이름을 문자열로 설정
+    headers = [f"Queue {i}" for i in range(len(data))]
+    df = pd.DataFrame([data], columns=headers)
+    return df
+def model_submit(model_id , benchmark_name, prompt_cfg_name):
+    model_id = model_id.split("/")[-1]
+    sheet_manager = SheetManager()
+    sheet_manager.push(model_id)
+    model_q = list_to_dataframe(sheet_manager.get_all_values())
+    sheet_manager.change_column("benchmark_name")
+    sheet_manager.push(benchmark_name)
+    sheet_manager.change_column("prompt_cfg_name")
+    sheet_manager.push(prompt_cfg_name)
+    return model_q
+def read_queue():
+    sheet_manager = SheetManager()
+    return list_to_dataframe(sheet_manager.get_all_values())
+def submit_tab():
+    with gr.Tab("🚀 Submit here! "):
+        with gr.Row():
+            gr.Markdown("# ✉️✨ Submit your Result here!")
+        with gr.Row():
+            with gr.Tab("Model"):
+                with gr.Row():
+                    with gr.Column():
+                        model_id_textbox = gr.Textbox(
+                            label="huggingface_id",
+                            placeholder="PIA-SPACE-LAB/T2V_CLIP4Clip",
+                            interactive = True
+                            )
+                        benchmark_name_textbox = gr.Textbox(
+                            label="benchmark_name",
+                            placeholder="PiaFSV",
+                            interactive = True,
+                            value="PIA"
+                            )
+                        prompt_cfg_name_textbox = gr.Textbox(
+                            label="prompt_cfg_name",
+                            placeholder="topk",
+                            interactive = True,
+                            value="topk"
+                            )
+                    with gr.Column():
+                        gr.Markdown("## 평가를 받아보세요 반드시 허깅페이스에 업로드된 모델이어야 합니다.")
+                        gr.Markdown("#### 현재 평가 대기중 모델입니다.")
+                        model_queue = gr.Dataframe()
+                        refresh_button = gr.Button("refresh")
+                        refresh_button.click(
+                            fn=read_queue,
+                            outputs=model_queue
+                        )
+                with gr.Row():
+                    model_submit_button = gr.Button("Submit Eval")
+                    model_submit_button.click(
+                        fn=model_submit,
+                        inputs=[model_id_textbox,
+                        benchmark_name_textbox ,
+                        prompt_cfg_name_textbox],
+                        outputs=model_queue
+                    )
+            with gr.Tab("Prompt"):
+                with gr.Row():
+                    with gr.Column():
+                        prompt_cfg_selector = gr.Dropdown(
+                            choices=["전부"],
+                            label="Prompt_CFG",
+                            multiselect=False,
+                            value=None,
+                            interactive=True,
+                        )
+                        weight_type = gr.Dropdown(
+                            choices=["전부"],
+                            label="Weights type",
+                            multiselect=False,
+                            value=None,
+                            interactive=True,
+                        )
+                    with gr.Column():
+                        gr.Markdown("## 평가를 받아보세요 반드시 허깅페이스에 업로드된 모델이어야 합니다.")
+                with gr.Row():
+                    prompt_submit_button = gr.Button("Submit Eval")

main.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import sys
+import os
+from sheet_manager.sheet_crud.sheet_crud import SheetManager
+from sheet_manager.sheet_monitor.sheet_sync import SheetMonitor, MainLoop
+import time
+from pia_bench.pipe_line.piepline import BenchmarkPipeline, PipelineConfig
+from sheet_manager.sheet_convert.json2sheet import update_benchmark_json
+import os
+import shutil
+import json
+def calculate_total_accuracy(metrics: dict) -> float:
+    """
+    Calculate the average accuracy across all categories excluding 'micro_avg'.
+    Args:
+        metrics (dict): Metrics dictionary containing accuracy values.
+    Returns:
+        float: The average accuracy across categories.
+    """
+    total_accuracy = 0
+    total_count = 0
+    for category, values in metrics.items():
+        if category == "micro_avg":
+            continue  # Skip 'micro_avg'
+        if "accuracy" in values:
+            total_accuracy += values["accuracy"]
+            total_count += 1
+    if total_count == 0:
+        raise ValueError("No accuracy values found in the provided metrics dictionary.")
+    return total_accuracy / total_count
+def my_custom_function(huggingface_id, benchmark_name, prompt_cfg_name):
+    model_name = huggingface_id.split("/")[-1]
+    config = PipelineConfig(
+        model_name=model_name,
+        benchmark_name=benchmark_name,
+        cfg_target_path=f"/mnt/nas_192tb/videos/huggingface_benchmarks_dataset/Leaderboard_bench/{benchmark_name}/CFG/{prompt_cfg_name}.json",
+        base_path="/mnt/nas_192tb/videos/huggingface_benchmarks_dataset/Leaderboard_bench"
+)
+    pipeline = BenchmarkPipeline(config)
+    pipeline.run()
+    result = pipeline.bench_result_dict
+    value = calculate_total_accuracy(result)
+    print("---"*50)
+    sheet = SheetManager()
+    sheet.change_worksheet("model")
+    sheet.update_cell_by_condition(condition_column="Model name",
+                                    condition_value=model_name ,
+                                    target_column=benchmark_name,
+                                    target_value=value)
+    update_benchmark_json(
+    model_name = model_name,
+    benchmark_data = result,
+    target_column = benchmark_name  # 타겟 칼럼 파라미터 추가
+)
+    print(f"\n파이프라인 실행 결과:")
+sheet_manager = SheetManager()
+monitor = SheetMonitor(sheet_manager, check_interval=60.0)
+main_loop = MainLoop(sheet_manager, monitor, callback_function=my_custom_function)
+try:
+    main_loop.start()
+    while True:
+        time.sleep(5)
+except KeyboardInterrupt:
+    main_loop.stop()

pia_bench/bench.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import os
+import shutil
+from devmacs_core.devmacs_core import DevMACSCore
+import json
+from typing import Dict, List, Tuple
+from pathlib import Path
+import pandas as pd
+from utils.except_dir import cust_listdir
+def load_config(config_path: str) -> Dict:
+    """JSON 설정 파일을 읽어서 딕셔너리로 반환"""
+    with open(config_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+DATA_SET = "dataset"
+CFG = "CFG"
+VECTOR = "vector"
+TEXT = "text"
+VIDEO = "video"
+EXECPT = ["@eaDir", "README.md"]
+ALRAM = "alarm"
+METRIC = "metric"
+MSRVTT = "MSRVTT"
+MODEL = "models"
+class PiaBenchMark:
+    def __init__(self, benchmark_path , cfg_target_path : str = None , model_name : str = MSRVTT , token:str =None):
+        self.benchmark_path = benchmark_path
+        self.token = token
+        self.model_name = model_name
+        self.devmacs_core = None
+        self.cfg_target_path = cfg_target_path
+        self.cfg_name = Path(cfg_target_path).stem
+        self.cfg_dict = load_config(self.cfg_target_path)
+        self.dataset_path = os.path.join(benchmark_path, DATA_SET)
+        self.cfg_path = os.path.join(benchmark_path , CFG)
+        self.model_path = os.path.join(self.benchmark_path , MODEL)
+        self.model_name_path = os.path.join(self.model_path ,self.model_name)
+        self.model_name_cfg_path = os.path.join(self.model_name_path , CFG)
+        self.model_name_cfg_name_path = os.path.join(self.model_name_cfg_path , self.cfg_name)
+        self.alram_path = os.path.join(self.model_name_cfg_name_path , ALRAM)
+        self.metric_path = os.path.join(self.model_name_cfg_name_path , METRIC)
+        self.vector_path = os.path.join(self.model_name_path , VECTOR)
+        self.vector_text_path = os.path.join(self.vector_path , TEXT)
+        self.vector_video_path = os.path.join(self.vector_path , VIDEO)
+        self.categories = []
+    def _create_frame_labels(self, label_data: Dict, total_frames: int) -> pd.DataFrame:
+        """프레임 기반의 레이블 데이터프레임 생성"""
+        colmuns = ['frame'] + sorted(self.categories)
+        df = pd.DataFrame(0, index=range(total_frames), columns=colmuns)
+        df['frame'] = range(total_frames)
+        for clip_info in label_data['clips'].values():
+            category = clip_info['category']
+            if category in self.categories:  # 해당 카테고리가 목록에 있는 경우만 처리
+                start_frame, end_frame = clip_info['timestamp']
+                df.loc[start_frame:end_frame, category] = 1
+        return df
+    def preprocess_label_to_csv(self):
+        """데이터셋의 모든 JSON 라벨을 프레임 기반 CSV로 변환"""
+        json_files = []
+        csv_files = []
+        # categories가 비어있는 경우에만 채우도록 수정
+        if not self.categories:
+            for cate in cust_listdir(self.dataset_path):
+                if os.path.isdir(os.path.join(self.dataset_path, cate)):
+                    self.categories.append(cate)
+        for category in self.categories:
+            category_path = os.path.join(self.dataset_path, category)
+            category_jsons = [os.path.join(category, f) for f in cust_listdir(category_path) if f.endswith('.json')]
+            json_files.extend(category_jsons)
+            category_csvs = [os.path.join(category, f) for f in cust_listdir(category_path) if f.endswith('.csv')]
+            csv_files.extend(category_csvs)
+        if not json_files:
+            raise ValueError("No JSON files found in any category directory")
+        if len(json_files) == len(csv_files):
+            print("All JSON files have already been processed to CSV. No further processing needed.")
+            return
+        for json_file in json_files:
+            json_path = os.path.join(self.dataset_path, json_file)
+            video_name = os.path.splitext(json_file)[0]
+            label_info = load_config(json_path)
+            video_info = label_info['video_info']
+            total_frames = video_info['total_frame']
+            df = self._create_frame_labels( label_info, total_frames)
+            output_path = os.path.join(self.dataset_path, f"{video_name}.csv")
+            df.to_csv(output_path , index=False)
+        print("Complete !")
+    def preprocess_structure(self):
+        os.makedirs(self.dataset_path, exist_ok=True)
+        os.makedirs(self.cfg_path, exist_ok=True)
+        os.makedirs(self.vector_text_path, exist_ok=True)
+        os.makedirs(self.vector_video_path, exist_ok=True)
+        os.makedirs(self.alram_path, exist_ok=True)
+        os.makedirs(self.metric_path, exist_ok=True)
+        os.makedirs(self.model_name_cfg_name_path , exist_ok=True)
+        # dataset 폴더가 이미 존재하고 그 안에 카테고리 폴더들이 있는지 확인
+        if os.path.exists(self.dataset_path) and any(os.path.isdir(os.path.join(self.dataset_path, d)) for d in cust_listdir(self.dataset_path)):
+            # 이미 구성된 구조라면, dataset 폴더에서 카테고리들을 가져옴
+            self.categories = [d for d in cust_listdir(self.dataset_path) if os.path.isdir(os.path.join(self.dataset_path, d))]
+        else:
+            # 처음 실행되는 경우, 기존 로직대로 진행
+            for item in cust_listdir(self.benchmark_path):
+                item_path = os.path.join(self.benchmark_path, item)
+                if item.startswith("@") or item in [METRIC ,"README.md",MODEL,  CFG, DATA_SET, VECTOR, ALRAM] or not os.path.isdir(item_path):
+                    continue
+                target_path = os.path.join(self.dataset_path, item)
+                if not os.path.exists(target_path):
+                    shutil.move(item_path, target_path)
+                    self.categories.append(item)
+        for category in self.categories:
+            category_path = os.path.join(self.vector_video_path, category)
+            os.makedirs(category_path, exist_ok=True)
+        print("Folder preprocessing completed.")
+    def extract_visual_vector(self):
+        self.devmacs_core = DevMACSCore.from_huggingface(token=self.token, repo_id=f"PIA-SPACE-LAB/{self.model_name}")
+        self.devmacs_core.save_visual_results(
+            vid_dir = self.dataset_path,
+            result_dir = self.vector_video_path
+        )
+if __name__ == "__main__":
+    from dotenv import load_dotenv
+    import os
+    load_dotenv()
+    access_token = os.getenv("ACCESS_TOKEN")
+    model_name = "T2V_CLIP4CLIP_MSRVTT"
+    benchmark_path = "/home/jungseoik/data/Abnormal_situation_leader_board/assets/PIA"
+    cfg_target_path= "/home/jungseoik/data/Abnormal_situation_leader_board/assets/PIA/CFG/topk.json"
+    pia_benchmark = PiaBenchMark(benchmark_path ,model_name=model_name, cfg_target_path= cfg_target_path , token=access_token )
+    pia_benchmark.preprocess_structure()
+    pia_benchmark.preprocess_label_to_csv()
+    print("Categories identified:", pia_benchmark.categories)

pia_bench/checker/bench_checker.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import os
+import logging
+from typing import List, Dict, Optional, Tuple
+from pathlib import Path
+import json
+import numpy as np
+logging.basicConfig(level=logging.INFO)
+class BenchChecker:
+    def __init__(self, base_path: str):
+        """Initialize BenchChecker with base assets path.
+        Args:
+            base_path (str): Base path to assets directory containing benchmark folders
+        """
+        self.base_path = Path(base_path)
+        self.logger = logging.getLogger(__name__)
+    def check_benchmark_exists(self, benchmark_name: str) -> bool:
+        """Check if benchmark folder exists."""
+        benchmark_path = self.base_path / benchmark_name
+        exists = benchmark_path.exists() and benchmark_path.is_dir()
+        if exists:
+            self.logger.info(f"Found benchmark directory: {benchmark_name}")
+        else:
+            self.logger.error(f"Benchmark directory not found: {benchmark_name}")
+        return exists
+    def get_video_list(self, benchmark_name: str) -> List[str]:
+        """Get list of videos from benchmark's dataset directory. Return empty list if no videos found."""
+        dataset_path = self.base_path / benchmark_name / "dataset"
+        videos = []
+        if not dataset_path.exists():
+            self.logger.info(f"Dataset directory exists but no videos found for {benchmark_name}")
+            return videos  # 빈 리스트 반환
+        # Recursively find all .mp4 files
+        for category in dataset_path.glob("*"):
+            if category.is_dir():
+                for video_file in category.glob("*.mp4"):
+                    videos.append(video_file.stem)
+        self.logger.info(f"Found {len(videos)} videos in {benchmark_name} dataset")
+        return videos
+    def check_model_exists(self, benchmark_name: str, model_name: str) -> bool:
+        """Check if model directory exists in benchmark's models directory."""
+        model_path = self.base_path / benchmark_name / "models" / model_name
+        exists = model_path.exists() and model_path.is_dir()
+        if exists:
+            self.logger.info(f"Found model directory: {model_name}")
+        else:
+            self.logger.error(f"Model directory not found: {model_name}")
+        return exists
+    def check_cfg_files(self, benchmark_name: str, model_name: str, cfg_prompt: str) -> Tuple[bool, bool]:
+        """Check if CFG files/directories exist in both benchmark and model directories."""
+        # Check benchmark CFG json
+        benchmark_cfg = self.base_path / benchmark_name / "CFG" / f"{cfg_prompt}.json"
+        benchmark_cfg_exists = benchmark_cfg.exists() and benchmark_cfg.is_file()
+        # Check model CFG directory
+        model_cfg = self.base_path / benchmark_name / "models" / model_name / "CFG" / cfg_prompt
+        model_cfg_exists = model_cfg.exists() and model_cfg.is_dir()
+        if benchmark_cfg_exists:
+            self.logger.info(f"Found benchmark CFG file: {cfg_prompt}.json")
+        else:
+            self.logger.error(f"Benchmark CFG file not found: {cfg_prompt}.json")
+        if model_cfg_exists:
+            self.logger.info(f"Found model CFG directory: {cfg_prompt}")
+        else:
+            self.logger.error(f"Model CFG directory not found: {cfg_prompt}")
+        return benchmark_cfg_exists, model_cfg_exists
+    def check_vector_files(self, benchmark_name: str, model_name: str, video_list: List[str]) -> bool:
+        """Check if video vectors match with dataset."""
+        vector_path = self.base_path / benchmark_name / "models" / model_name / "vector" / "video"
+        # 비디오가 없는 경우는 무조건 False
+        if not video_list:
+            self.logger.error("No videos found in dataset - cannot proceed")
+            return False
+        # 벡터 디렉토리가 있는지 확인
+        if not vector_path.exists():
+            self.logger.error("Vector directory doesn't exist")
+            return False
+        # 벡터 파일 리스트 가져오기
+        # vector_files = [f.stem for f in vector_path.glob("*.npy")]
+        vector_files = [f.stem for f in vector_path.rglob("*.npy")]
+        missing_vectors = set(video_list) - set(vector_files)
+        extra_vectors = set(vector_files) - set(video_list)
+        if missing_vectors:
+            self.logger.error(f"Missing vectors for videos: {missing_vectors}")
+            return False
+        if extra_vectors:
+            self.logger.error(f"Extra vectors found: {extra_vectors}")
+            return False
+        self.logger.info(f"Vector status: videos={len(video_list)}, vectors={len(vector_files)}")
+        return len(video_list) == len(vector_files)
+    def check_metrics_file(self, benchmark_name: str, model_name: str, cfg_prompt: str) -> bool:
+        """Check if overall_metrics.json exists in the model's CFG/metrics directory."""
+        metrics_path = self.base_path / benchmark_name / "models" / model_name / "CFG" / cfg_prompt / "metric" / "overall_metrics.json"
+        exists = metrics_path.exists() and metrics_path.is_file()
+        if exists:
+            self.logger.info(f"Found overall metrics file for {model_name}")
+        else:
+            self.logger.error(f"Overall metrics file not found for {model_name}")
+        return exists
+    def check_benchmark(self, benchmark_name: str, model_name: str, cfg_prompt: str) -> Dict[str, bool]:
+        """
+        Perform all benchmark checks and return status.
+        """
+        status = {
+            'benchmark_exists': False,
+            'model_exists': False,
+            'cfg_files_exist': False,
+            'vectors_match': False,
+            'metrics_exist': False
+        }
+        # Check benchmark directory
+        status['benchmark_exists'] = self.check_benchmark_exists(benchmark_name)
+        if not status['benchmark_exists']:
+            return status
+        # Get video list
+        video_list = self.get_video_list(benchmark_name)
+        # Check model directory
+        status['model_exists'] = self.check_model_exists(benchmark_name, model_name)
+        if not status['model_exists']:
+            return status
+        # Check CFG files
+        benchmark_cfg, model_cfg = self.check_cfg_files(benchmark_name, model_name, cfg_prompt)
+        status['cfg_files_exist'] = benchmark_cfg and model_cfg
+        if not status['cfg_files_exist']:
+            return status
+        # Check vectors
+        status['vectors_match'] = self.check_vector_files(benchmark_name, model_name, video_list)
+        # Check metrics file (only if vectors match)
+        if status['vectors_match']:
+            status['metrics_exist'] = self.check_metrics_file(benchmark_name, model_name, cfg_prompt)
+        return status
+    def get_benchmark_status(self, check_status: Dict[str, bool]) -> str:
+        """Determine which execution path to take based on check results."""
+        basic_checks = ['benchmark_exists', 'model_exists', 'cfg_files_exist']
+        if not all(check_status[check] for check in basic_checks):
+            return "cannot_execute"
+        if check_status['vectors_match'] and check_status['metrics_exist']:
+            return "all_passed"
+        elif not check_status['vectors_match']:
+            return "no_vectors"
+        else:  # vectors exist but no metrics
+            return "no_metrics"
+# Example usage
+if __name__ == "__main__":
+    bench_checker = BenchChecker("assets")
+    status = bench_checker.check_benchmark(
+        benchmark_name="huggingface_benchmarks_dataset",
+        model_name="MSRVTT",
+        cfg_prompt="topk"
+    )
+    execution_path = bench_checker.get_benchmark_status(status)
+    print(f"Checks completed. Execution path: {execution_path}")
+    print(f"Status: {status}")

pia_bench/checker/sheet_checker.py ADDED Viewed

	@@ -0,0 +1,284 @@

+from typing import List, Dict, Optional, Set, Tuple
+import logging
+import gspread
+from dotenv import load_dotenv
+from typing import Optional, List
+from sheet_manager.sheet_crud.sheet_crud import SheetManager
+load_dotenv()
+class SheetChecker:
+    def __init__(self, sheet_manager):
+        """Initialize SheetChecker with a sheet manager instance."""
+        self.sheet_manager = sheet_manager
+        self.bench_sheet_manager = None
+        self.logger = logging.getLogger(__name__)
+        self._init_bench_sheet()
+    def _init_bench_sheet(self):
+        """Initialize sheet manager for the model sheet."""
+        self.bench_sheet_manager = type(self.sheet_manager)(
+            spreadsheet_url=self.sheet_manager.spreadsheet_url,
+            worksheet_name="model",
+            column_name="Model name"
+        )
+    def add_benchmark_column(self, column_name: str):
+        """Add a new benchmark column to the sheet."""
+        try:
+            # Get current headers
+            headers = self.bench_sheet_manager.get_available_columns()
+            # If column already exists, return
+            if column_name in headers:
+                return
+            # Add new column header
+            new_col_index = len(headers) + 1
+            cell = gspread.utils.rowcol_to_a1(1, new_col_index)
+            # Update with 2D array format
+            self.bench_sheet_manager.sheet.update(cell, [[column_name]])  # 값을 2D 배열로 변경
+            self.logger.info(f"Added new benchmark column: {column_name}")
+            # Update headers in bench_sheet_manager
+            self.bench_sheet_manager._connect_to_sheet(validate_column=False)
+        except Exception as e:
+            self.logger.error(f"Error adding benchmark column {column_name}: {str(e)}")
+            raise
+    def validate_benchmark_columns(self, benchmark_columns: List[str]) -> Tuple[List[str], List[str]]:
+        """
+        Validate benchmark columns and add missing ones.
+        Args:
+            benchmark_columns: List of benchmark column names to validate
+        Returns:
+            Tuple[List[str], List[str]]: (valid columns, invalid columns)
+        """
+        available_columns = self.bench_sheet_manager.get_available_columns()
+        valid_columns = []
+        invalid_columns = []
+        for col in benchmark_columns:
+            if col in available_columns:
+                valid_columns.append(col)
+            else:
+                try:
+                    self.add_benchmark_column(col)
+                    valid_columns.append(col)
+                    self.logger.info(f"Added new benchmark column: {col}")
+                except Exception as e:
+                    invalid_columns.append(col)
+                    self.logger.error(f"Failed to add benchmark column '{col}': {str(e)}")
+        return valid_columns, invalid_columns
+    def check_model_and_benchmarks(self, model_name: str, benchmark_columns: List[str]) -> Dict[str, List[str]]:
+        """
+        Check model existence and which benchmarks need to be filled.
+        Args:
+            model_name: Name of the model to check
+            benchmark_columns: List of benchmark column names to check
+        Returns:
+            Dict with keys:
+                'status': 'model_not_found' or 'model_exists'
+                'empty_benchmarks': List of benchmark columns that need to be filled
+                'filled_benchmarks': List of benchmark columns that are already filled
+                'invalid_benchmarks': List of benchmark columns that don't exist
+        """
+        result = {
+            'status': '',
+            'empty_benchmarks': [],
+            'filled_benchmarks': [],
+            'invalid_benchmarks': []
+        }
+        # First check if model exists
+        exists = self.check_model_exists(model_name)
+        if not exists:
+            result['status'] = 'model_not_found'
+            return result
+        result['status'] = 'model_exists'
+        # Validate benchmark columns
+        valid_columns, invalid_columns = self.validate_benchmark_columns(benchmark_columns)
+        result['invalid_benchmarks'] = invalid_columns
+        if not valid_columns:
+            return result
+        # Check which valid benchmarks are empty
+        self.bench_sheet_manager.change_column("Model name")
+        all_values = self.bench_sheet_manager.get_all_values()
+        row_index = all_values.index(model_name) + 2
+        for column in valid_columns:
+            try:
+                self.bench_sheet_manager.change_column(column)
+                value = self.bench_sheet_manager.sheet.cell(row_index, self.bench_sheet_manager.col_index).value
+                if not value or not value.strip():
+                    result['empty_benchmarks'].append(column)
+                else:
+                    result['filled_benchmarks'].append(column)
+            except Exception as e:
+                self.logger.error(f"Error checking column {column}: {str(e)}")
+                result['empty_benchmarks'].append(column)
+        return result
+    def update_model_info(self, model_name: str, model_info: Dict[str, str]):
+        """Update basic model information columns."""
+        try:
+            for column_name, value in model_info.items():
+                self.bench_sheet_manager.change_column(column_name)
+                self.bench_sheet_manager.push(value)
+            self.logger.info(f"Successfully added new model: {model_name}")
+        except Exception as e:
+            self.logger.error(f"Error updating model info: {str(e)}")
+            raise
+    def update_benchmarks(self, model_name: str, benchmark_values: Dict[str, str]):
+        """
+        Update benchmark values.
+        Args:
+            model_name: Name of the model
+            benchmark_values: Dictionary of benchmark column names and their values
+        """
+        try:
+            self.bench_sheet_manager.change_column("Model name")
+            all_values = self.bench_sheet_manager.get_all_values()
+            row_index = all_values.index(model_name) + 2
+            for column, value in benchmark_values.items():
+                self.bench_sheet_manager.change_column(column)
+                self.bench_sheet_manager.sheet.update_cell(row_index, self.bench_sheet_manager.col_index, value)
+                self.logger.info(f"Updated benchmark {column} for model {model_name}")
+        except Exception as e:
+            self.logger.error(f"Error updating benchmarks: {str(e)}")
+            raise
+    def check_model_exists(self, model_name: str) -> bool:
+        """Check if model exists in the sheet."""
+        try:
+            self.bench_sheet_manager.change_column("Model name")
+            values = self.bench_sheet_manager.get_all_values()
+            return model_name in values
+        except Exception as e:
+            self.logger.error(f"Error checking model existence: {str(e)}")
+            return False
+def process_model_benchmarks(
+    model_name: str,
+    bench_checker: SheetChecker,
+    model_info_func,
+    benchmark_processor_func: callable,
+    benchmark_columns: List[str],
+    cfg_prompt: str
+) -> None:
+    """
+    Process model benchmarks according to the specified workflow.
+    Args:
+        model_name: Name of the model to process
+        bench_checker: SheetChecker instance
+        model_info_func: Function that returns model info (name, link, etc.)
+        benchmark_processor_func: Function that processes empty benchmarks and returns values
+        benchmark_columns: List of benchmark columns to check
+    """
+    try:
+        # Check model and benchmarks
+        check_result = bench_checker.check_model_and_benchmarks(model_name, benchmark_columns)
+        # Handle invalid benchmark columns
+        if check_result['invalid_benchmarks']:
+            bench_checker.logger.warning(
+                f"Skipping invalid benchmark columns: {', '.join(check_result['invalid_benchmarks'])}"
+            )
+        # If model doesn't exist, add it
+        if check_result['status'] == 'model_not_found':
+            model_info = model_info_func(model_name)
+            bench_checker.update_model_info(model_name, model_info)
+            bench_checker.logger.info(f"Added new model: {model_name}")
+            # Recheck benchmarks after adding model
+            check_result = bench_checker.check_model_and_benchmarks(model_name, benchmark_columns)
+        # Log filled benchmarks
+        if check_result['filled_benchmarks']:
+            bench_checker.logger.info(
+                f"Skipping filled benchmark columns: {', '.join(check_result['filled_benchmarks'])}"
+            )
+        # Process empty benchmarks
+        if check_result['empty_benchmarks']:
+            bench_checker.logger.info(
+                f"Processing empty benchmark columns: {', '.join(check_result['empty_benchmarks'])}"
+            )
+            # Get benchmark values from processor function
+            benchmark_values = benchmark_processor_func(
+                model_name,
+                check_result['empty_benchmarks'],
+                cfg_prompt
+            )
+            # Update benchmarks
+            bench_checker.update_benchmarks(model_name, benchmark_values)
+        else:
+            bench_checker.logger.info("No empty benchmark columns to process")
+    except Exception as e:
+        bench_checker.logger.error(f"Error processing model {model_name}: {str(e)}")
+        raise
+def get_model_info(model_name: str) -> Dict[str, str]:
+    return {
+        "Model name": model_name,
+        "Model link": f"https://huggingface.co/PIA-SPACE-LAB/{model_name}",
+        "Model": f'<a target="_blank" href="https://huggingface.co/PIA-SPACE-LAB/{model_name}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+    }
+def process_benchmarks(
+    model_name: str,
+    empty_benchmarks: List[str],
+    cfg_prompt: str
+) -> Dict[str, str]:
+    """
+    Measure benchmark scores for given model with specific configuration.
+    Args:
+        model_name: Name of the model to evaluate
+        empty_benchmarks: List of benchmarks to measure
+        cfg_prompt: Prompt configuration for evaluation
+    Returns:
+        Dict[str, str]: Dictionary mapping benchmark names to their scores
+    """
+    result = {}
+    for benchmark in empty_benchmarks:
+        # 실제 벤치마크 측정 수행
+        # score = measure_benchmark(model_name, benchmark, cfg_prompt)
+        if benchmark == "COCO":
+            score = 0.5
+        elif benchmark == "ImageNet":
+            score = 15.0
+        result[benchmark] = str(score)
+    return result
+# Example usage
+if __name__ == "__main__":
+    sheet_manager = SheetManager()
+    bench_checker = SheetChecker(sheet_manager)
+    process_model_benchmarks(
+        "test-model",
+        bench_checker,
+        get_model_info,
+        process_benchmarks,
+        ["COCO", "ImageNet"],
+        "cfg_prompt_value"
+    )

pia_bench/event_alarm.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import os
+import numpy as np
+import torch
+from typing import Dict, List, Tuple
+from devmacs_core.devmacs_core import DevMACSCore
+# from devmacs_core.devmacs_core_copy import DevMACSCore
+from devmacs_core.utils.common.cal import loose_similarity
+from utils.parser import load_config, PromptManager
+import json
+import pandas as pd
+from tqdm import tqdm
+import logging
+from datetime import datetime
+from utils.except_dir import cust_listdir
+class EventDetector:
+    def __init__(self, config_path: str , model_name:str = None, token:str = None):
+        self.config = load_config(config_path)
+        self.macs = DevMACSCore.from_huggingface(token=token, repo_id=f"PIA-SPACE-LAB/{model_name}")
+        # self.macs = DevMACSCore(model_type="clip4clip_web")
+        self.prompt_manager = PromptManager(config_path)
+        self.sentences = self.prompt_manager.sentences
+        self.text_vectors = self.macs.get_text_vector(self.sentences)
+    def process_and_save_predictions(self, vector_base_dir: str, label_base_dir: str, save_base_dir: str):
+        """비디오 벡터를 처리하고 결과를 CSV로 저장"""
+        # 전체 비디오 파일 수 계산
+        total_videos = sum(len([f for f in cust_listdir(os.path.join(vector_base_dir, d))
+                                if f.endswith('.npy')])
+                            for d in cust_listdir(vector_base_dir)
+                            if os.path.isdir(os.path.join(vector_base_dir, d)))
+        pbar = tqdm(total=total_videos, desc="Processing videos")
+        for category in cust_listdir(vector_base_dir):
+            category_path = os.path.join(vector_base_dir, category)
+            if not os.path.isdir(category_path):
+                continue
+            # 저장 디렉토리 생성
+            save_category_dir = os.path.join(save_base_dir, category)
+            os.makedirs(save_category_dir, exist_ok=True)
+            for file in cust_listdir(category_path):
+                if file.endswith('.npy'):
+                    video_name = os.path.splitext(file)[0]
+                    vector_path = os.path.join(category_path, file)
+                    # 라벨 파일 읽기
+                    label_path = os.path.join(label_base_dir, category, f"{video_name}.json")
+                    with open(label_path, 'r') as f:
+                        label_data = json.load(f)
+                        total_frames = label_data['video_info']['total_frame']
+                    # 예측 결과 생성 및 저장
+                    self._process_and_save_single_video(
+                        vector_path=vector_path,
+                        total_frames=total_frames,
+                        save_path=os.path.join(save_category_dir, f"{video_name}.csv")
+                    )
+                    pbar.update(1)
+        pbar.close()
+    def _process_and_save_single_video(self, vector_path: str, total_frames: int, save_path: str):
+        """단일 비디오 처리 및 저장"""
+        # 기본 예측 수행
+        sparse_predictions = self._process_single_vector(vector_path)
+        # 데이터프레임으로 변환 및 확장
+        df = self._expand_predictions(sparse_predictions, total_frames)
+        # CSV로 저장
+        df.to_csv(save_path, index=False)
+    def _process_single_vector(self, vector_path: str) -> Dict:
+        """기존 예측 로직"""
+        video_vector = np.load(vector_path)
+        processed_vectors = []
+        frame_interval = 15
+        for vector in video_vector:
+            v = vector.squeeze(0)  # numpy array
+            v = torch.from_numpy(v).unsqueeze(0).cuda()  # torch tensor로 변환 후 GPU로
+            processed_vectors.append(v)
+        frame_results = {}
+        for vector_idx, v in enumerate(processed_vectors):
+            actual_frame = vector_idx * frame_interval
+            sim_scores = loose_similarity(
+                sequence_output=self.text_vectors.cuda(),
+                visual_output=v.unsqueeze(1)
+            )
+            frame_results[actual_frame] = self._calculate_alarms(sim_scores)
+        return frame_results
+    def _expand_predictions(self, sparse_predictions: Dict, total_frames: int) -> pd.DataFrame:
+        """예측을 전체 프레임으로 확장"""
+        # 카테고리 목록 추출 (첫 번째 프레임의 알람 결과에서)
+        first_frame = list(sparse_predictions.keys())[0]
+        categories = list(sparse_predictions[first_frame].keys())
+        # 전체 프레임 생성
+        df = pd.DataFrame({'frame': range(total_frames)})
+        # 각 카테고리에 대한 알람 값 초기화
+        for category in categories:
+            df[category] = 0
+        # 예측값 채우기
+        frame_keys = sorted(sparse_predictions.keys())
+        for i in range(len(frame_keys)):
+            current_frame = frame_keys[i]
+            next_frame = frame_keys[i + 1] if i + 1 < len(frame_keys) else total_frames
+            # 각 카테고리의 알람 값 설정
+            for category in categories:
+                alarm_value = sparse_predictions[current_frame][category]['alarm']
+                df.loc[current_frame:next_frame-1, category] = alarm_value
+        return df
+    def _calculate_alarms(self, sim_scores: torch.Tensor) -> Dict:
+        """유사도 점수를 기반으로 각 이벤트의 알람 상태 계산"""
+        # 로거 설정
+        log_filename = f"alarm_calculation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
+        logging.basicConfig(
+            filename=log_filename,
+            level=logging.ERROR,
+            format='%(asctime)s - %(message)s',
+            datefmt='%Y-%m-%d %H:%M:%S'
+        )
+        logger = logging.getLogger(__name__)
+        event_alarms = {}
+        for event_config in self.config['PROMPT_CFG']:
+            event = event_config['event']
+            top_k = event_config['top_candidates']
+            threshold = event_config['alert_threshold']
+            # logger.info(f"\nProcessing event: {event}")
+            # logger.info(f"Top K: {top_k}, Threshold: {threshold}")
+            event_prompts = self._get_event_prompts(event)
+            # logger.debug(f"\nEvent Prompts Debug for {event}:")
+            # logger.debug(f"Indices: {event_prompts['indices']}")
+            # logger.debug(f"Types: {event_prompts['types']}")
+            # logger.debug(f"\nSim Scores Debug:")
+            # logger.debug(f"Shape: {sim_scores.shape}")
+            # logger.debug(f"Raw scores: {sim_scores}")
+            # event_scores = sim_scores[event_prompts['indices']]
+            event_scores = sim_scores[event_prompts['indices']].squeeze(-1)  # shape 변경
+            # logger.debug(f"Event scores shape: {event_scores.shape}")
+            # logger.debug(f"Event scores: {event_scores}")
+            # 각 프롬프트와 점수 출력
+            # logger.info("\nDEBUG VALUES:")
+            # logger.info(f"event_scores: {event_scores}")
+            # logger.info(f"indices: {event_prompts['indices']}")
+            # logger.info(f"types: {event_prompts['types']}")
+            # logger.info("\nAll prompts and scores:")
+            # for idx, (score, prompt_type) in enumerate(zip(event_scores, event_prompts['types'])):
+            #     logger.info(f"Type: {prompt_type}, Score: {score.item():.4f}")
+            top_k_values, top_k_indices = torch.topk(event_scores, min(top_k, len(event_scores)))
+            # logger.info(f"top_k_values: {top_k_values}")
+            # logger.info(f"top_k_indices (raw): {top_k_indices}")
+            # Top K 결과 출력
+            # logger.info(f"\nTop {top_k} selections:")
+            for idx, (value, index) in enumerate(zip(top_k_values, top_k_indices)):
+                # indices[index]가 아닌 index를 직접 사용
+                prompt_type = event_prompts['types'][index]  # 수정된 부분
+                # logger.info(f"DEBUG: index={index}, types={event_prompts['types']}, selected_type={prompt_type}")
+                # logger.info(f"Rank {idx+1}: Type: {prompt_type}, Score: {value.item():.4f}")
+            abnormal_count = sum(1 for idx in top_k_indices
+                    if event_prompts['types'][idx] == 'abnormal')  # 수정된 부분
+            # for idx, (value, orig_idx) in enumerate(zip(top_k_values, top_k_indices)):
+            #     prompt_type = event_prompts['types'][orig_idx.item()]
+            #     logger.info(f"Rank {idx+1}: Type: {prompt_type}, Score: {value.item():.4f}")
+            # abnormal_count = sum(1 for idx in top_k_indices
+            #                 if event_prompts['types'][idx.item()] == 'abnormal')
+            # 알람 결정 과정 출력
+            # logger.info(f"\nAbnormal count: {abnormal_count}")
+            alarm_result = 1 if abnormal_count >= threshold else 0
+            # logger.info(f"Final alarm decision: {alarm_result}")
+            # logger.info("-" * 50)
+            event_alarms[event] = {
+                'alarm': alarm_result,
+                'scores': top_k_values.tolist(),
+                'top_k_types': [event_prompts['types'][idx.item()] for idx in top_k_indices]
+            }
+        # 로거 종료
+        logging.shutdown()
+        return event_alarms
+    def _get_event_prompts(self, event: str) -> Dict:
+        indices = []
+        types = []
+        current_idx = 0
+        for event_config in self.config['PROMPT_CFG']:
+            if event_config['event'] == event:
+                for status in ['normal', 'abnormal']:
+                    for _ in range(len(event_config['prompts'][status])):
+                        indices.append(current_idx)
+                        types.append(status)
+                        current_idx += 1
+        return {'indices': indices, 'types': types}

pia_bench/metric.py ADDED Viewed

	@@ -0,0 +1,322 @@

+import os
+import pandas as pd
+import numpy as np
+from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
+from typing import Dict, List
+import json
+from utils.except_dir import cust_listdir
+class MetricsEvaluator:
+    def __init__(self, pred_dir: str, label_dir: str, save_dir: str):
+        """
+        Args:
+            pred_dir: 예측 csv 파일들이 있는 디렉토리 경로
+            label_dir: 정답 csv 파일들이 있는 디렉토리 경로
+            save_dir: 결과를 저장할 디렉토리 경로
+        """
+        self.pred_dir = pred_dir
+        self.label_dir = label_dir
+        self.save_dir = save_dir
+    def evaluate(self) -> Dict:
+        """전체 평가 수행"""
+        category_metrics = {}  # 카테고리별 평균 성능 저장
+        all_metrics = {  # 모든 카테고리 통합 메트릭
+            'falldown': {'f1': [], 'accuracy': [], 'precision': [], 'recall': [], 'specificity': []},
+            'violence': {'f1': [], 'accuracy': [], 'precision': [], 'recall': [], 'specificity': []},
+            'fire': {'f1': [], 'accuracy': [], 'precision': [], 'recall': [], 'specificity': []}
+        }
+        # 모든 카테고리의 metrics를 저장할 DataFrame 리스트
+        all_categories_metrics = []
+        for category in cust_listdir(self.pred_dir):
+            if not os.path.isdir(os.path.join(self.pred_dir, category)):
+                continue
+            pred_category_path = os.path.join(self.pred_dir, category)
+            label_category_path = os.path.join(self.label_dir, category)
+            save_category_path = os.path.join(self.save_dir, category)
+            os.makedirs(save_category_path, exist_ok=True)
+            # 결과 저장을 위한 데이터프레임 생성
+            metrics_df = self._evaluate_category(category, pred_category_path, label_category_path)
+            metrics_df['category'] = category
+            metrics_df.to_csv(os.path.join(save_category_path, f"{category}_metrics.csv"), index=False)
+            all_categories_metrics.append(metrics_df)
+            # 카테고리별 평균 성능 저장
+            category_metrics[category] = metrics_df.iloc[-1].to_dict()  # 마지막 row(평균)
+            # 전체 평균을 위한 메트릭 수집
+            # for col in metrics_df.columns:
+            #     if col != 'video_name':
+            #         event_type, metric_type = col.split('_')
+            #         all_metrics[event_type][metric_type].append(category_metrics[category][col])
+            for col in metrics_df.columns:
+                if col != 'video_name':
+                    try:
+                        # 첫 번째 언더스코어를 기준으로 이벤트 타입과 메트릭 타입 분리
+                        parts = col.split('_', 1)  # maxsplit=1로 첫 번째 언더스코어에서만 분리
+                        if len(parts) == 2:
+                            event_type, metric_type = parts
+                            if event_type in all_metrics and metric_type in all_metrics[event_type]:
+                                all_metrics[event_type][metric_type].append(category_metrics[category][col])
+                    except Exception as e:
+                        print(f"Warning: Could not process column {col}: {str(e)}")
+                        continue
+        # 각 DataFrame에서 마지막 행(average)을 제거
+        all_categories_metrics_without_avg = [df.iloc[:-1] for df in all_categories_metrics]
+        # 모든 카테고리의 metrics를 하나의 DataFrame으로 합치기
+        combined_metrics_df = pd.concat(all_categories_metrics_without_avg, ignore_index=True)
+        # 합쳐진 metrics를 json 파일과 같은 위치에 저장
+        combined_metrics_df.to_csv(os.path.join(self.save_dir, "all_categories_metrics.csv"), index=False)
+        # 결과 출력
+        # print("\nCategory-wise Average Metrics:")
+        # for category, metrics in category_metrics.items():
+        #     print(f"\n{category}:")
+        #     for metric_name, value in metrics.items():
+        #         if metric_name != "video_name":
+        #             print(f"{metric_name}: {value:.3f}")
+        print("\nCategory-wise Average Metrics:")
+        for category, metrics in category_metrics.items():
+            print(f"\n{category}:")
+            for metric_name, value in metrics.items():
+                if metric_name != "video_name":
+                    try:
+                        if isinstance(value, str):
+                            print(f"{metric_name}: {value}")
+                        elif metric_name in ['tp', 'tn', 'fp', 'fn']:
+                            print(f"{metric_name}: {int(value)}")
+                        else:
+                            print(f"{metric_name}: {float(value):.3f}")
+                    except (ValueError, TypeError):
+                        print(f"{metric_name}: {value}")
+        # 전체 평균 계산 및 출력
+        print("\n" + "="*50)
+        print("Overall Average Metrics Across All Categories:")
+        print("="*50)
+        # for event_type in all_metrics:
+        #     print(f"\n{event_type}:")
+        #     for metric_type, values in all_metrics[event_type].items():
+        #         avg_value = np.mean(values)
+        #         print(f"{metric_type}: {avg_value:.3f}")
+        for event_type in all_metrics:
+            print(f"\n{event_type}:")
+            for metric_type, values in all_metrics[event_type].items():
+                avg_value = np.mean(values)
+                if metric_type in ['tp', 'tn', 'fp', 'fn']:  # 정수 값
+                    print(f"{metric_type}: {int(avg_value)}")
+                else:  # 소수점 값
+                    print(f"{metric_type}: {avg_value:.3f}")
+        ##################################################################################################
+                # 최종 결과를 저장할 딕셔너리
+        final_results = {
+            "category_metrics": {},
+            "overall_metrics": {}
+        }
+        # 카테고리별 메트릭 저장
+        for category, metrics in category_metrics.items():
+            final_results["category_metrics"][category] = {}
+            for metric_name, value in metrics.items():
+                if metric_name != "video_name":
+                    if isinstance(value, (int, float)):
+                        final_results["category_metrics"][category][metric_name] = float(value)
+        # 전체 평균 계산 및 저장
+        for event_type in all_metrics:
+            # print(f"\n{event_type}:")
+            final_results["overall_metrics"][event_type] = {}
+            for metric_type, values in all_metrics[event_type].items():
+                avg_value = float(np.mean(values))
+                # print(f"{metric_type}: {avg_value:.3f}")
+                final_results["overall_metrics"][event_type][metric_type] = avg_value
+        # JSON 파일로 저장
+        json_path = os.path.join(self.save_dir, "overall_metrics.json")
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump(final_results, f, indent=4)
+        # return category_metrics
+        # 누적 메트릭 계산
+        accumulated_metrics = self.calculate_accumulated_metrics(combined_metrics_df)
+        # JSON에 누적 메트릭 추가
+        final_results["accumulated_metrics"] = accumulated_metrics
+        # 누적 메트릭만 따로 저장
+        accumulated_json_path = os.path.join(self.save_dir, "accumulated_metrics.json")
+        with open(accumulated_json_path, 'w', encoding='utf-8') as f:
+            json.dump(accumulated_metrics, f, indent=4)
+        return accumulated_metrics
+    def _evaluate_category(self, category: str, pred_path: str, label_path: str) -> pd.DataFrame:
+        """카테고리별 평가 수행"""
+        results = []
+        metrics_columns = ['video_name']
+        for pred_file in cust_listdir(pred_path):
+            if not pred_file.endswith('.csv'):
+                continue
+            video_name = os.path.splitext(pred_file)[0]
+            pred_df = pd.read_csv(os.path.join(pred_path, pred_file))
+            # 해당 비디오의 정답 CSV 파일 로드
+            label_file = f"{video_name}.csv"
+            label_path_full = os.path.join(label_path, label_file)
+            if not os.path.exists(label_path_full):
+                print(f"Warning: Label file not found for {video_name}")
+                continue
+            label_df = pd.read_csv(label_path_full)
+            # 각 카테고리별 메트릭 계산
+            video_metrics = {'video_name': video_name}
+            categories = [col for col in pred_df.columns if col != 'frame']
+            for cat in categories:
+                # 정답값과 예측값
+                y_true = label_df[cat].values
+                y_pred = pred_df[cat].values
+                # 메트릭 계산
+                metrics = self._calculate_metrics(y_true, y_pred)
+                # 결과 저장
+                for metric_name, value in metrics.items():
+                    col_name = f"{cat}_{metric_name}"
+                    video_metrics[col_name] = value
+                    if col_name not in metrics_columns:
+                        metrics_columns.append(col_name)
+            results.append(video_metrics)
+        # 결과를 데이터프레임으로 변환
+        metrics_df = pd.DataFrame(results, columns=metrics_columns)
+        # 평균 계산하여 추가
+        avg_metrics = {'video_name': 'average'}
+        for col in metrics_columns[1:]:  # video_name 제외
+            avg_metrics[col] = metrics_df[col].mean()
+        metrics_df = pd.concat([metrics_df, pd.DataFrame([avg_metrics])], ignore_index=True)
+        return metrics_df
+    # def _calculate_metrics(self, y_true: np.ndarray, y_pred: np.ndarray) -> Dict:
+    #     """성능 지표 계산"""
+    #     tn = np.sum((y_true == 0) & (y_pred == 0))
+    #     fp = np.sum((y_true == 0) & (y_pred == 1))
+    #     metrics = {
+    #         'f1': f1_score(y_true, y_pred, zero_division=0),
+    #         'accuracy': accuracy_score(y_true, y_pred),
+    #         'precision': precision_score(y_true, y_pred, zero_division=0),
+    #         'recall': recall_score(y_true, y_pred, zero_division=0),
+    #         'specificity': tn / (tn + fp) if (tn + fp) > 0 else 0
+    #     }
+    #     return metrics
+    def calculate_accumulated_metrics(self, all_categories_metrics_df: pd.DataFrame) -> Dict:
+        """누적된 혼동행렬로 각 카테고리별 성능 지표 계산"""
+        accumulated_results = {"micro_avg": {}}
+        categories = ['falldown', 'violence', 'fire']
+        for category in categories:
+            # 해당 카테고리의 혼동행렬 값들 누적
+            tp = all_categories_metrics_df[f'{category}_tp'].sum()
+            tn = all_categories_metrics_df[f'{category}_tn'].sum()
+            fp = all_categories_metrics_df[f'{category}_fp'].sum()
+            fn = all_categories_metrics_df[f'{category}_fn'].sum()
+            # 기본 메트릭 계산
+            metrics = {
+                'tp': int(tp),
+                'tn': int(tn),
+                'fp': int(fp),
+                'fn': int(fn),
+                'accuracy': (tp + tn) / (tp + tn + fp + fn) if (tp + tn + fp + fn) > 0 else 0,
+                'precision': tp / (tp + fp) if (tp + fp) > 0 else 0,
+                'recall': tp / (tp + fn) if (tp + fn) > 0 else 0,
+                'specificity': tn / (tn + fp) if (tn + fp) > 0 else 0,
+                'f1': 2 * tp / (2 * tp + fp + fn) if (2 * tp + fp + fn) > 0 else 0,
+            }
+            # 추가 메트릭 계산
+            tpr = metrics['recall']  # TPR = recall
+            tnr = metrics['specificity']  # TNR = specificity
+            # Balanced Accuracy
+            metrics['balanced_accuracy'] = (tpr + tnr) / 2
+            # G-Mean
+            metrics['g_mean'] = np.sqrt(tpr * tnr) if (tpr * tnr) > 0 else 0
+            # MCC (Matthews Correlation Coefficient)
+            numerator = (tp * tn) - (fp * fn)
+            denominator = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
+            metrics['mcc'] = numerator / denominator if denominator > 0 else 0
+            # NPV (Negative Predictive Value)
+            metrics['npv'] = tn / (tn + fn) if (tn + fn) > 0 else 0
+            # FAR (False Alarm Rate) = FPR = 1 - specificity
+            metrics['far'] = 1 - metrics['specificity']
+            accumulated_results[category] = metrics
+        # 전체 카테고리의 누적 값으로 계산
+        total_tp = sum(accumulated_results[cat]['tp'] for cat in categories)
+        total_tn = sum(accumulated_results[cat]['tn'] for cat in categories)
+        total_fp = sum(accumulated_results[cat]['fp'] for cat in categories)
+        total_fn = sum(accumulated_results[cat]['fn'] for cat in categories)
+        # micro average 계산 (전체 누적 값으로 계산)
+        accumulated_results["micro_avg"] = {
+            'tp': int(total_tp),
+            'tn': int(total_tn),
+            'fp': int(total_fp),
+            'fn': int(total_fn),
+            'accuracy': (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn),
+            'precision': total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0,
+            'recall': total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0,
+            'f1': 2 * total_tp / (2 * total_tp + total_fp + total_fn) if (2 * total_tp + total_fp + total_fn) > 0 else 0,
+            # ... (다른 메트릭들도 동일한 방식으로 계산)
+        }
+        return accumulated_results
+    def _calculate_metrics(self, y_true: np.ndarray, y_pred: np.ndarray) -> Dict:
+        """성능 지표 계산"""
+        tn = np.sum((y_true == 0) & (y_pred == 0))
+        fp = np.sum((y_true == 0) & (y_pred == 1))
+        fn = np.sum((y_true == 1) & (y_pred == 0))
+        tp = np.sum((y_true == 1) & (y_pred == 1))
+        metrics = {
+            'f1': f1_score(y_true, y_pred, zero_division=0),
+            'accuracy': accuracy_score(y_true, y_pred),
+            'precision': precision_score(y_true, y_pred, zero_division=0),
+            'recall': recall_score(y_true, y_pred, zero_division=0),
+            'specificity': tn / (tn + fp) if (tn + fp) > 0 else 0,
+            'tp': int(tp),
+            'tn': int(tn),
+            'fp': int(fp),
+            'fn': int(fn)
+        }
+        return metrics

pia_bench/pipe_line/piepline.py ADDED Viewed

	@@ -0,0 +1,227 @@

+from pia_bench.checker.bench_checker import BenchChecker
+from pia_bench.checker.sheet_checker import SheetChecker
+from pia_bench.event_alarm import EventDetector
+from pia_bench.metric import MetricsEvaluator
+from sheet_manager.sheet_crud.sheet_crud import SheetManager
+from pia_bench.bench import PiaBenchMark
+from dotenv import load_dotenv
+from typing import Optional, List , Dict
+import os
+load_dotenv()
+import numpy as np
+from typing import Dict, Tuple
+from typing import Dict, Optional, Tuple
+import logging
+from dataclasses import dataclass
+from sheet_manager.sheet_checker.sheet_check import SheetChecker
+from sheet_manager.sheet_crud.sheet_crud import SheetManager
+from pia_bench.checker.bench_checker import BenchChecker
+logging.basicConfig(level=logging.INFO)
+@dataclass
+class PipelineConfig:
+    """파이프라인 설정을 위한 데이터 클래스"""
+    model_name: str
+    benchmark_name: str
+    cfg_target_path: str
+    base_path: str = "/mnt/nas_192tb/videos/huggingface_benchmarks_dataset/Leaderboard_bench"
+class BenchmarkPipelineStatus:
+    """파이프라인 상태 및 결과 관리"""
+    def __init__(self):
+        self.sheet_status: Tuple[bool, bool] = (False, False)  # (model_added, benchmark_exists)
+        self.bench_status: Dict[str, bool] = {}
+        self.bench_result: str = ""
+        self.current_stage: str = "not_started"
+    def is_success(self) -> bool:
+        """전체 파이프라인 성공 여부"""
+        return (not self.sheet_status[0]  # 모델이 이미 존재하고
+                and self.sheet_status[1]  # 벤치마크가 존재하고
+                and self.bench_result == "all_passed")  # 벤치마크 체크도 통과
+    def __str__(self) -> str:
+        return (f"Current Stage: {self.current_stage}\n"
+                f"Sheet Status: {self.sheet_status}\n"
+                f"Bench Status: {self.bench_status}\n"
+                f"Bench Result: {self.bench_result}")
+class BenchmarkPipeline:
+    """벤치마크 실행을 위한 파이프라인"""
+    def __init__(self, config: PipelineConfig):
+        self.config = config
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.status = BenchmarkPipelineStatus()
+        self.access_token = os.getenv("ACCESS_TOKEN")
+        self.cfg_prompt = os.path.splitext(os.path.basename(self.config.cfg_target_path))[0]
+        # Initialize checkers
+        self.sheet_manager = SheetManager()
+        self.sheet_checker = SheetChecker(self.sheet_manager)
+        self.bench_checker = BenchChecker(self.config.base_path)
+        self.bench_result_dict = None
+    def run(self) -> BenchmarkPipelineStatus:
+        """전체 파이프라인 실행"""
+        try:
+            self.status.current_stage = "sheet_check"
+            proceed = self._check_sheet()
+            if not proceed:
+                self.status.current_stage = "completed_no_action_needed"
+                self.logger.info("벤치마크가 이미 존재하여 추가 작업이 필요하지 않습니다.")
+                return self.status
+            self.status.current_stage = "bench_check"
+            if not self._check_bench():
+                return self.status
+            self.status.current_stage = "execution"
+            self._execute_based_on_status()
+            self.status.current_stage = "completed"
+            return self.status
+        except Exception as e:
+            self.logger.error(f"파이프라인 실행 중 에러 발생: {str(e)}")
+            self.status.current_stage = "error"
+            return self.status
+    def _check_sheet(self) -> bool:
+        """구글 시트 상태 체크"""
+        self.logger.info("시트 상태 체크 시작")
+        model_added, benchmark_exists = self.sheet_checker.check_model_and_benchmark(
+            self.config.model_name,
+            self.config.benchmark_name
+        )
+        self.status.sheet_status = (model_added, benchmark_exists)
+        if model_added:
+            self.logger.info("새로운 모델이 추가되었습니다")
+        if not benchmark_exists:
+            self.logger.info("벤치마크 측정이 필요합니다")
+            return True  # 벤치마크 측정이 필요한 경우만 다음 단계로 진행
+        self.logger.info("이미 벤치마크가 존재합니다. 파이프라인을 종료합니다.")
+        return False  # 벤치마크가 이미 있으면 여기서 중단
+    def _check_bench(self) -> bool:
+        """로컬 벤치마크 환경 체크"""
+        self.logger.info("벤치마크 환경 체크 시작")
+        self.status.bench_status = self.bench_checker.check_benchmark(
+            self.config.benchmark_name,
+            self.config.model_name,
+            self.cfg_prompt
+        )
+        self.status.bench_result = self.bench_checker.get_benchmark_status(
+            self.status.bench_status
+        )
+        # no bench 상태 벤치를 돌린적이 없음 폴더구조도 없음
+        if self.status.bench_result == "no bench":
+            self.logger.error("벤치마크 실행에 필요한 기본 폴더구조가 없습니다.")
+            return True
+        return True  # 그 외의 경우만 다음 단계로 진행
+    def _execute_based_on_status(self):
+        """상태에 따른 실행 로직"""
+        if self.status.bench_result == "all_passed":
+            self._execute_full_pipeline()
+        elif self.status.bench_result == "no_vectors":
+            self._execute_vector_generation()
+        elif self.status.bench_result == "no_metrics":
+            self._execute_metrics_generation()
+        else:
+            self._execute_vector_generation()
+            self.logger.warning("폴더구조가 없습니다")
+    def _execute_full_pipeline(self):
+        """모든 조건이 충족된 경우의 실행 로직"""
+        self.logger.info("전체 파이프라인 실행 중...")
+        pia_benchmark = PiaBenchMark(
+                                benchmark_path  = f"/mnt/nas_192tb/videos/huggingface_benchmarks_dataset/Leaderboard_bench/{self.config.benchmark_name}" ,
+                                model_name=self.config.model_name,
+                                cfg_target_path= self.config.cfg_target_path ,
+                                token=self.access_token )
+        pia_benchmark.preprocess_structure()
+        print("Categories identified:", pia_benchmark.categories)
+        metric = MetricsEvaluator(pred_dir=pia_benchmark.alram_path,
+                        label_dir=pia_benchmark.dataset_path,
+                        save_dir=pia_benchmark.metric_path)
+        self.bench_result_dict = metric.evaluate()
+    def _execute_vector_generation(self):
+        """벡터 생성이 필요한 경우의 실행 로직"""
+        self.logger.info("벡터 생성 중...")
+        # 구현 필요
+        pia_benchmark = PiaBenchMark(
+                                benchmark_path  = f"/mnt/nas_192tb/videos/huggingface_benchmarks_dataset/Leaderboard_bench/{self.config.benchmark_name}" ,
+                                model_name=self.config.model_name,
+                                cfg_target_path= self.config.cfg_target_path ,
+                                token=self.access_token )
+        pia_benchmark.preprocess_structure()
+        pia_benchmark.preprocess_label_to_csv()
+        print("Categories identified:", pia_benchmark.categories)
+        pia_benchmark.extract_visual_vector()
+        detector = EventDetector(config_path=self.config.cfg_target_path,
+                                 model_name=self.config.model_name ,
+                                 token=pia_benchmark.token)
+        detector.process_and_save_predictions(pia_benchmark.vector_video_path,
+                                            pia_benchmark.dataset_path,
+                                            pia_benchmark.alram_path)
+        metric = MetricsEvaluator(pred_dir=pia_benchmark.alram_path,
+                                label_dir=pia_benchmark.dataset_path,
+                                save_dir=pia_benchmark.metric_path)
+        self.bench_result_dict = metric.evaluate()
+    def _execute_metrics_generation(self):
+        """메트릭 생성이 필요한 경우의 실행 로직"""
+        self.logger.info("메트릭 생성 중...")
+        # 구현 필요
+        pia_benchmark = PiaBenchMark(
+                                benchmark_path  = f"/mnt/nas_192tb/videos/huggingface_benchmarks_dataset/Leaderboard_bench/{self.config.benchmark_name}" ,
+                                model_name=self.config.model_name,
+                                cfg_target_path= self.config.cfg_target_path ,
+                                token=self.access_token )
+        pia_benchmark.preprocess_structure()
+        pia_benchmark.preprocess_label_to_csv()
+        print("Categories identified:", pia_benchmark.categories)
+        detector = EventDetector(config_path=self.config.cfg_target_path,
+                                 model_name=self.config.model_name ,
+                                 token=pia_benchmark.token)
+        detector.process_and_save_predictions(pia_benchmark.vector_video_path,
+                                            pia_benchmark.dataset_path,
+                                            pia_benchmark.alram_path)
+        metric = MetricsEvaluator(pred_dir=pia_benchmark.alram_path,
+                                label_dir=pia_benchmark.dataset_path,
+                                save_dir=pia_benchmark.metric_path)
+        self.bench_result_dict = metric.evaluate()
+if __name__ == "__main__":
+    # 파이프라인 설정
+    config = PipelineConfig(
+        model_name="T2V_CLIP4CLIP_MSRVTT",
+        benchmark_name="PIA",
+        cfg_target_path="topk.json",
+        base_path="/mnt/nas_192tb/videos/huggingface_benchmarks_dataset/Leaderboard_bench"
+    )
+    # 파이프라인 실행
+    pipeline = BenchmarkPipeline(config)
+    result = pipeline.run()
+    print(f"\n파이프라인 실행 결과:")
+    print(str(result))

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+oauth2client
+gspread
+gradio
+python-dotenv
+APScheduler
+black
+gradio[oauth]
+gradio_leaderboard==0.0.13
+gradio_client
+huggingface-hub>=0.18.0
+matplotlib
+numpy
+pandas
+python-dateutil
+tqdm

sample.csv ADDED Viewed

	@@ -0,0 +1,99 @@

+video_name,resolution,video_duration,category,benchmark,duration_seconds,total_frames,file_format,file_size_mb,aspect_ratio,fps
+417-2_cam02_assault01_place03_night_spring.mp4,3840x2160,5:15,violence,PIA,315.14816666666667,9445,.mp4,351.66,1.78,29.97002997002997
+439-5_cam01_assault01_place03_day_summer.mp4,3840x2160,5:19,violence,PIA,318.6516666666667,9550,.mp4,342.09,1.78,29.97002997002997
+24-2_cam01_assault01_place09_night_winter.mp4,3840x2160,4:33,violence,PIA,272.7725,8175,.mp4,299.35,1.78,29.97002997002997
+6-1_cam01_assault01_place03_night_summer.mp4,3840x2160,5:19,violence,PIA,318.6516666666667,9550,.mp4,342.81,1.78,29.97002997002997
+fight_0026.mp4,640x360,3:20,violence,PIA,199.93306666666666,5992,.mp4,16.19,1.78,29.97002997002997
+10-1_cam01_assault03_place07_night_winter.mp4,3840x2160,5:10,violence,PIA,310.07643333333334,9293,.mp4,333.36,1.78,29.97002997002997
+22-2_cam01_assault01_place07_night_winter.mp4,3840x2160,5:5,violence,PIA,305.305,9150,.mp4,368.69,1.78,29.97002997002997
+407-6_cam01_assault01_place04_day_winter.mp4,3840x2160,5:13,violence,PIA,312.77913333333333,9374,.mp4,747.29,1.78,29.97002997002997
+407-6_cam01_assault01_place04_day_spring.mp4,3840x2160,5:9,violence,PIA,309.10880000000003,9264,.mp4,738.48,1.78,29.97002997002997
+411-3_cam01_assault01_place08_night_winter.mp4,3840x2160,5:15,violence,PIA,314.7477666666667,9433,.mp4,337.82,1.78,29.97002997002997
+412-1_cam01_assault01_place09_night_winter.mp4,3840x2160,5:30,violence,PIA,330.26326666666665,9898,.mp4,380.09,1.78,29.97002997002997
+416-5_cam03_assault01_place02_night_spring.mp4,3840x2160,5:9,violence,PIA,308.97533333333337,9260,.mp4,342.81,1.78,29.97002997002997
+12-1_cam01_assault01_place09_day_summer.mp4,3840x2160,4:54,violence,PIA,294.02706666666666,8812,.mp4,712.79,1.78,29.97002997002997
+13-4_cam02_assault02_place08_day_spring.mp4,3840x2160,4:56,violence,PIA,295.96233333333333,8870,.mp4,711.64,1.78,29.97002997002997
+16-3_cam01_assault01_place02_night_summer.mp4,3840x2160,5:0,violence,PIA,300.3333666666667,9001,.mp4,358.8,1.78,29.97002997002997
+17-2_cam01_assault03_place03_night_spring.mp4,3840x2160,5:13,violence,PIA,312.8125,9375,.mp4,758.24,1.78,29.97002997002997
+2-3_cam01_assault01_place04_night_spring.mp4,3840x2160,5:12,violence,PIA,312.4454666666667,9364,.mp4,750.96,1.78,29.97002997002997
+20-3_cam02_assault01_place02_night_summer.mp4,3840x2160,5:14,violence,PIA,314.2139,9417,.mp4,450.17,1.78,29.97002997002997
+23-3_cam01_assault01_place02_night_summer.mp4,3840x2160,5:0,violence,PIA,300.3,9000,.mp4,358.84,1.78,29.97002997002997
+406-1_cam01_assault01_place03_day_summer.mp4,3840x2160,5:0,violence,PIA,300.3333666666667,9001,.mp4,428.29,1.78,29.97002997002997
+8-1_cam01_assault03_place05_day_spring.mp4,3840x2160,5:0,violence,PIA,300.3,9000,.mp4,717.83,1.78,29.97002997002997
+fight_0035.mp4,406x720,1:47,violence,PIA,107.27383333333333,3215,.mp4,11.04,0.56,29.97002997002997
+fight_0062.mp4,640x360,1:12,violence,PIA,71.70496666666666,2149,.mp4,3.05,1.78,29.97002997002997
+fight_0051.mp4,1280x720,2:0,violence,PIA,120.43333333333334,3599,.mp4,30.26,1.78,29.88375311375588
+fight_0097.mp4,1280x720,1:8,violence,PIA,68.00126666666667,2038,.mp4,6.47,1.78,29.97002997002997
+fight_0125.mp4,1280x720,1:26,violence,PIA,86.01926666666667,2578,.mp4,9.09,1.78,29.97002997002997
+fight_0141.mp4,1280x720,3:11,violence,PIA,191.42456666666666,5737,.mp4,19.21,1.78,29.97002997002997
+fight_0147.mp4,640x360,2:35,violence,PIA,154.73333333333335,4624,.mp4,9.45,1.78,29.88367083153813
+fight_0156.mp4,1280x720,1:16,violence,PIA,76.0,2280,.mp4,10.58,1.78,30.0
+fight_0162.mp4,1280x720,1:8,violence,PIA,67.86666666666666,2036,.mp4,8.28,1.78,30.0
+20190102_013314A.mp4,3840x2160,15:0,fire,PIA,900.39,27013,.mp4,2477.09,1.78,30.001443818789635
+화재 - 불피우기.mp4,1920x1080,2:40,fire,PIA,159.535675,4786,.mp4,76.23,1.78,29.999559659618452
+화재 - 토치.mp4,1920x1080,8:44,fire,PIA,523.871349,15716,.mp4,249.98,1.78,29.999731861648346
+Video34.mp4,292x240,15:1,fire,PIA,901.0485436893204,7734,.mp4,1.71,1.22,8.583333333333334
+Video5.mp4,320x240,3:7,fire,PIA,187.33333333333334,4496,.mp4,4.82,1.33,24.0
+Video49.mp4,320x240,1:9,fire,PIA,69.08333333333333,1658,.mp4,2.7,1.33,24.0
+Video149.mp4,854x480,0:30,fire,PIA,29.996663329996665,899,.mp4,5.37,1.78,29.97
+Video261.mp4,292x240,15:0,fire,PIA,900.1199999999999,7501,.mp4,4.09,1.22,8.333333333333334
+fire_general-fire_rgb_0002_cctv1.mp4,1920x1080,0:7,fire,PIA,7.0,189,.mp4,5.58,1.78,27.0
+fire_general-fire_rgb_0065_cctv4.mp4,1920x1080,0:9,fire,PIA,8.525191858525192,511,.mp4,47.03,1.78,59.94
+fire_general-fire_rgb_0070_cctv3.mp4,1920x1080,0:12,fire,PIA,12.479145812479146,748,.mp4,73.64,1.78,59.94
+fire_general-fire_rgb_0083_cctv2.mp4,1920x1080,0:10,fire,PIA,9.743076409743077,584,.mp4,55.54,1.78,59.94
+fire_general-fire_rgb_0559_cctv1.mp4,1920x1080,0:10,fire,PIA,10.477143810477145,628,.mp4,81.13,1.78,59.94
+fire_general-fire_rgb_0556_cctv2.mp4,1920x1080,0:9,fire,PIA,9.376042709376042,562,.mp4,65.8,1.78,59.94
+fire_general-fire_rgb_0530_cctv1.mp4,1920x1080,0:12,fire,PIA,12.028695362028696,721,.mp4,87.26,1.78,59.94
+fire_general-fire_rgb_0514_cctv2.mp4,1920x1080,0:8,fire,PIA,8.241574908241574,494,.mp4,59.32,1.78,59.94
+fire_general-fire_rgb_0475_cctv1.mp4,1920x1080,0:9,fire,PIA,9.426092759426092,565,.mp4,73.66,1.78,59.94
+fire_general-fire_rgb_0460_cctv2.mp4,1920x1080,0:11,fire,PIA,11.16116116116116,669,.mp4,80.93,1.78,59.94
+fire_general-fire_rgb_0356_cctv1.mp4,1920x1080,0:9,fire,PIA,9.376042709376042,562,.mp4,79.11,1.78,59.94
+fire_general-fire_rgb_0331_cctv2.mp4,1920x1080,0:7,fire,PIA,6.773440106773441,406,.mp4,52.11,1.78,59.94
+fire_general-fire_rgb_0305_cctv3.mp4,1920x1080,0:10,fire,PIA,10.226893560226895,613,.mp4,84.28,1.78,59.94
+fire_general-fire_rgb_0291_cctv1.mp4,1920x1080,0:4,fire,PIA,4.170837504170838,250,.mp4,22.08,1.78,59.94
+fire_general-fire_rgb_0280_cctv4.mp4,1920x1080,0:4,fire,PIA,4.087420754087421,245,.mp4,21.92,1.78,59.94
+fire_general-fire_rgb_0562_cctv7.mp4,1920x1080,0:7,fire,PIA,7.0,203,.mp4,7.62,1.78,29.0
+fire_general-fire_rgb_0337_cctv2.mp4,1920x1080,0:7,fire,PIA,7.0,203,.mp4,5.37,1.78,29.0
+fire_general-fire_rgb_0289_cctv2.mp4,1920x1080,0:7,fire,PIA,7.0,203,.mp4,7.74,1.78,29.0
+fire_oil-fire_rgb_0002_cctv2.mp4,1920x1080,0:6,fire,PIA,6.0,180,.mp4,5.28,1.78,30.0
+fire_oil-fire_rgb_0222_cctv4.mp4,1920x1080,0:6,fire,PIA,6.0,180,.mp4,6.37,1.78,30.0
+fire_oil-fire_rgb_0445_cctv7.mp4,1920x1080,0:6,fire,PIA,6.0,174,.mp4,4.19,1.78,29.0
+Explosion004_x264.mp4,320x240,1:3,fire,PIA,63.4,1902,.mp4,8.04,1.33,30.0
+Explosion005_x264.mp4,320x240,0:23,fire,PIA,23.1,693,.mp4,5.46,1.33,30.0
+Explosion009_x264.mp4,320x240,0:37,fire,PIA,36.7,1101,.mp4,9.13,1.33,30.0
+Explosion010_x264.mp4,320x240,1:23,fire,PIA,83.26666666666667,2498,.mp4,11.58,1.33,30.0
+Explosion013_x264.mp4,320x240,1:51,fire,PIA,110.56666666666666,3317,.mp4,15.66,1.33,30.0
+Explosion014_x264.mp4,320x240,0:43,fire,PIA,43.06666666666667,1292,.mp4,6.24,1.33,30.0
+Explosion017_x264.mp4,320x240,0:55,fire,PIA,54.766666666666666,1643,.mp4,12.31,1.33,30.0
+Explosion002_x264.mp4,320x240,2:14,fire,PIA,133.76666666666668,4013,.mp4,18.32,1.33,30.0
+Explosion051_x264.mp4,320x240,1:34,fire,PIA,94.0,2820,.mp4,13.52,1.33,30.0
+119-1_cam01_swoon01_place03_day_summer.mp4,3840x2160,4:60,falldown,PIA,299.9997,8991,.mp4,810.22,1.78,29.97002997002997
+100-5_cam02_swoon01_place02_day_summer.mp4,3840x2160,5:21,falldown,PIA,321.38773333333336,9632,.mp4,451.85,1.78,29.97002997002997
+FILE210101-012606F.MOV,3840x2160,15:0,falldown,PIA,900.0324666666667,26974,.mov,3362.23,1.78,29.97002997002997
+118-2_cam01_swoon02_place10_day_spring.mp4,3840x2160,5:0,falldown,PIA,300.3,9000,.mp4,442.05,1.78,29.97002997002997
+108-5_cam02_swoon01_place06_night_spring.mp4,3840x2160,4:56,falldown,PIA,296.22926666666666,8878,.mp4,718.68,1.78,29.97002997002997
+FILE210101-003713F.MOV,3840x2160,15:0,falldown,PIA,900.0324666666667,26974,.mov,3362.27,1.78,29.97002997002997
+FILE210101-010727F.MOV,3840x2160,15:0,falldown,PIA,900.0324666666667,26974,.mov,3362.04,1.78,29.97002997002997
+245-5_cam02_swoon01_place04_night_spring.mp4,3840x2160,5:7,falldown,PIA,306.97333333333336,9200,.mp4,369.43,1.78,29.97002997002997
+115-1_cam01_swoon01_place02_night_spring.mp4,3840x2160,5:8,falldown,PIA,308.2746333333333,9239,.mp4,362.95,1.78,29.97002997002997
+110-2_cam01_swoon01_place01_day_spring.mp4,3840x2160,4:55,falldown,PIA,295.06143333333335,8843,.mp4,709.46,1.78,29.97002997002997
+FILE210101-005228F.MOV,3840x2160,15:0,falldown,PIA,900.0324666666667,26974,.mov,3362.06,1.78,29.97002997002997
+114-2_cam01_swoon03_place03_day_spring.mp4,3840x2160,5:6,falldown,PIA,306.306,9180,.mp4,438.87,1.78,29.97002997002997
+117-1_cam02_swoon01_place04_night_spring.mp4,3840x2160,5:8,falldown,PIA,308.4081,9243,.mp4,747.54,1.78,29.97002997002997
+104-2_cam01_swoon01_place04_day_spring.mp4,3840x2160,5:6,falldown,PIA,305.7054,9162,.mp4,742.15,1.78,29.97002997002997
+103-1_cam01_swoon01_place04_day_spring.mp4,3840x2160,5:8,falldown,PIA,307.5072,9216,.mp4,746.19,1.78,29.97002997002997
+99-4_cam03_swoon01_place03_day_winter.mp4,3840x2160,5:7,falldown,PIA,306.6396666666667,9190,.mp4,329.27,1.78,29.97002997002997
+240-3_cam01_swoon01_place02_night_spring.mp4,3840x2160,5:8,falldown,PIA,308.04106666666667,9232,.mp4,330.92,1.78,29.97002997002997
+115-5_cam02_swoon01_place02_night_spring.mp4,3840x2160,5:10,falldown,PIA,309.57593333333335,9278,.mp4,750.26,1.78,29.97002997002997
+640-2_cam01_swoon01_place01_day_summer.mp4,3840x2160,5:0,falldown,PIA,300.3333666666667,9001,.mp4,425.3,1.78,29.97002997002997
+517-3_cam03_swoon03_place04_night_winter.mp4,3840x2160,5:7,falldown,PIA,306.9066,9198,.mp4,330.13,1.78,29.97002997002997
+107-1_cam01_swoon01_place06_night_spring.mp4,3840x2160,4:0,falldown,PIA,240.24,7200,.mp4,580.49,1.78,29.97002997002997
+106-1_cam02_swoon01_place05_day_spring.mp4,3840x2160,5:1,falldown,PIA,300.56693333333334,9008,.mp4,430.69,1.78,29.97002997002997
+110-1_cam01_swoon01_place01_day_spring.mp4,3840x2160,4:55,falldown,PIA,294.6276666666667,8830,.mp4,708.43,1.78,29.97002997002997
+116-4_cam02_swoon01_place03_day_summer.mp4,3840x2160,5:7,falldown,PIA,306.53956666666664,9187,.mp4,340.57,1.78,29.97002997002997
+109-5_cam02_swoon02_place01_night_summer.mp4,3840x2160,5:12,falldown,PIA,312.312,9360,.mp4,757.09,1.78,29.97002997002997
+105-5_cam01_swoon01_place05_night_summer.mp4,3840x2160,5:11,falldown,PIA,311.34436666666664,9331,.mp4,333.82,1.78,29.97002997002997
+112-6_cam01_swoon02_place01_night_summer.mp4,3840x2160,4:0,falldown,PIA,240.24,7200,.mp4,582.44,1.78,29.97002997002997
+108-2_cam01_swoon01_place06_night_summer.mp4,3840x2160,4:60,falldown,PIA,299.7327666666667,8983,.mp4,726.91,1.78,29.97002997002997
+120-1_cam02_swoon02_place06_day_summer.mp4,3840x2160,5:22,falldown,PIA,322.2219,9657,.mp4,781.36,1.78,29.97002997002997
+113-2_cam02_swoon01_place08_day_summer.mp4,3840x2160,5:14,falldown,PIA,313.7134,9402,.mp4,450.0,1.78,29.97002997002997

sheet_manager/sheet_checker/sheet_check.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from typing import Dict, Tuple
+import logging
+import gspread
+from sheet_manager.sheet_crud.sheet_crud import SheetManager
+class SheetChecker:
+    def __init__(self, sheet_manager: SheetManager):
+        """SheetChecker 초기화"""
+        self.sheet_manager = sheet_manager
+        self.bench_sheet_manager = None
+        self.logger = logging.getLogger(__name__)
+        self._init_bench_sheet()
+    def _init_bench_sheet(self):
+        """model 시트용 시트 매니저 초기화"""
+        self.bench_sheet_manager = type(self.sheet_manager)(
+            spreadsheet_url=self.sheet_manager.spreadsheet_url,
+            worksheet_name="model",
+            column_name="Model name"
+        )
+    def add_benchmark_column(self, column_name: str):
+        """새로운 벤치마크 컬럼 추가"""
+        try:
+            headers = self.bench_sheet_manager.get_available_columns()
+            if column_name in headers:
+                return
+            new_col_index = len(headers) + 1
+            cell = gspread.utils.rowcol_to_a1(1, new_col_index)
+            self.bench_sheet_manager.sheet.update(cell, [[column_name]])
+           # 관련 컬럼 추가 (벤치마크이름*100)
+            next_col_index = new_col_index + 1
+            next_cell = gspread.utils.rowcol_to_a1(1, next_col_index)
+            self.bench_sheet_manager.sheet.update(next_cell, [[f"{column_name}*100"]])
+            self.logger.info(f"새로운 벤치마크 컬럼들 추가됨: {column_name}, {column_name}*100")
+            # 컬럼 추가 후 시트 매니저 재연결
+            self.bench_sheet_manager._connect_to_sheet(validate_column=False)
+        except Exception as e:
+            self.logger.error(f"벤치마크 컬럼 {column_name} 추가 중 오류 발생: {str(e)}")
+            raise
+    def check_model_and_benchmark(self, model_name: str, benchmark_name: str) -> Tuple[bool, bool]:
+        """
+        모델 존재 여부와 벤치마크 상태를 확인하고, 필요한 경우 모델 정보를 추가
+        Args:
+            model_name: 확인할 모델 이름
+            benchmark_name: 확인할 벤치마크 이름
+        Returns:
+            Tuple[bool, bool]: (모델이 새로 추가되었는지 여부, 벤치마크가 이미 존재하는지 여부)
+        """
+        try:
+            # 모델 존재 여부 확인
+            model_exists = self._check_model_exists(model_name)
+            model_added = False
+            # 모델이 없으면 추가
+            if not model_exists:
+                self._add_new_model(model_name)
+                model_added = True
+                self.logger.info(f"새로운 모델 추가됨: {model_name}")
+            # 벤치마크 컬럼이 없으면 추가
+            available_columns = self.bench_sheet_manager.get_available_columns()
+            if benchmark_name not in available_columns:
+                self.add_benchmark_column(benchmark_name)
+                self.logger.info(f"새로운 벤치마크 컬럼 추가됨: {benchmark_name}")
+            # 벤치마크 상태 확인
+            benchmark_exists = self._check_benchmark_exists(model_name, benchmark_name)
+            return model_added, benchmark_exists
+        except Exception as e:
+            self.logger.error(f"모델/벤치마크 확인 중 오류 발생: {str(e)}")
+            raise
+    def _check_model_exists(self, model_name: str) -> bool:
+        """모델 존재 여부 확인"""
+        try:
+            self.bench_sheet_manager.change_column("Model name")
+            values = self.bench_sheet_manager.get_all_values()
+            return model_name in values
+        except Exception as e:
+            self.logger.error(f"모델 존재 여부 확인 중 오류 발생: {str(e)}")
+            raise
+    def _add_new_model(self, model_name: str):
+        """새로운 모델 정보 추가"""
+        try:
+            model_info = {
+                "Model name": model_name,
+                "Model link": f"https://huggingface.co/PIA-SPACE-LAB/{model_name}",
+                "Model": f'<a target="_blank" href="https://huggingface.co/PIA-SPACE-LAB/{model_name}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+            }
+            for column_name, value in model_info.items():
+                self.bench_sheet_manager.change_column(column_name)
+                self.bench_sheet_manager.push(value)
+        except Exception as e:
+            self.logger.error(f"모델 정보 추가 중 오류 발생: {str(e)}")
+            raise
+    def _check_benchmark_exists(self, model_name: str, benchmark_name: str) -> bool:
+        """벤치마크 값 존재 여부 확인"""
+        try:
+            # 해당 모델의 벤치마크 값 확인
+            self.bench_sheet_manager.change_column("Model name")
+            all_values = self.bench_sheet_manager.get_all_values()
+            row_index = all_values.index(model_name) + 2
+            self.bench_sheet_manager.change_column(benchmark_name)
+            value = self.bench_sheet_manager.sheet.cell(row_index, self.bench_sheet_manager.col_index).value
+            return bool(value and value.strip())
+        except Exception as e:
+            self.logger.error(f"벤치마크 존재 여부 확인 중 오류 발생: {str(e)}")
+            raise
+# 사용 예시
+if __name__ == "__main__":
+    sheet_manager = SheetManager()
+    checker = SheetChecker(sheet_manager)
+    model_added, benchmark_exists = checker.check_model_and_benchmark(
+        model_name="test-model",
+        benchmark_name="COCO"
+    )
+    print(f"Model added: {model_added}")
+    print(f"Benchmark exists: {benchmark_exists}")

sheet_manager/sheet_convert/json2sheet.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import json
+from sheet_manager.sheet_crud.sheet_crud import SheetManager
+import json
+from typing import Optional, Dict
+def update_benchmark_json(
+    model_name: str,
+    benchmark_data: dict,
+    worksheet_name: str = "metric",
+    target_column: str = "benchmark"  # 타겟 칼럼 파라미터 추가
+):
+    """
+    특정 모델의 벤치마크 데이터를 JSON 형태로 업데이트합니다.
+    Args:
+        model_name (str): 업데이트할 모델 이름
+        benchmark_data (dict): 업데이트할 벤치마크 데이터 딕셔너리
+        worksheet_name (str): 작업할 워크시트 이름 (기본값: "metric")
+        target_column (str): 업데이트할 타겟 칼럼 이름 (기본값: "benchmark")
+    """
+    sheet_manager = SheetManager(worksheet_name=worksheet_name)
+    # 딕셔너리를 JSON 문자열로 변환
+    json_str = json.dumps(benchmark_data, ensure_ascii=False)
+    # 모델명을 기준으로 지정된 칼럼 업데이트
+    row = sheet_manager.update_cell_by_condition(
+        condition_column="Model name",  # 모델명이 있는 칼럼
+        condition_value=model_name,     # 찾을 모델명
+        target_column=target_column,    # 업데이트할 타겟 칼럼
+        target_value=json_str          # 업데이트할 JSON 값
+    )
+    if row:
+        print(f"Successfully updated {target_column} data for model: {model_name}")
+    else:
+        print(f"Model {model_name} not found in the sheet")
+def get_benchmark_dict(
+    model_name: str,
+    worksheet_name: str = "metric",
+    target_column: str = "benchmark",
+    save_path: Optional[str] = None
+) -> Dict:
+    """
+    시트에서 특정 모델의 벤치마크 JSON 데이터를 가져와 딕셔너리로 변환합니다.
+    Args:
+        model_name (str): 가져올 모델 이름
+        worksheet_name (str): 작업할 워크시트 이름 (기본값: "metric")
+        target_column (str): 데이터를 가져올 칼럼 이름 (기본값: "benchmark")
+        save_path (str, optional): 딕셔너리를 저장할 JSON 파일 경로
+    Returns:
+        Dict: 벤치마크 데이터 딕셔너리. 데이터가 없거나 JSON 파싱 실패시 빈 딕셔너리 반환
+    """
+    sheet_manager = SheetManager(worksheet_name=worksheet_name)
+    try:
+        # 모든 데이터 가져오기
+        data = sheet_manager.sheet.get_all_records()
+        # 해당 모델 찾기
+        target_row = next(
+            (row for row in data if row.get("Model name") == model_name),
+            None
+        )
+        if not target_row:
+            print(f"Model {model_name} not found in the sheet")
+            return {}
+        # 타겟 칼럼의 JSON 문자열 가져오기
+        json_str = target_row.get(target_column)
+        if not json_str:
+            print(f"No data found in {target_column} for model: {model_name}")
+            return {}
+        # JSON 문자열을 딕셔너리로 변환
+        result_dict = json.loads(json_str)
+        # 결과 저장 (save_path가 제공된 경우)
+        if save_path:
+            with open(save_path, 'w', encoding='utf-8') as f:
+                json.dump(result_dict, f, ensure_ascii=False, indent=2)
+            print(f"Successfully saved dictionary to: {save_path}")
+        return result_dict
+    except json.JSONDecodeError:
+        print(f"Failed to parse JSON data for model: {model_name}")
+        return {}
+    except Exception as e:
+        print(f"Error occurred: {str(e)}")
+        return {}
+def str2json(json_str):
+    """
+    문자열을 JSON 객체로 변환합니다.
+    Args:
+        json_str (str): JSON 형식의 문자열
+    Returns:
+        dict: 파싱된 JSON 객체, 실패시 None
+    """
+    try:
+        return json.loads(json_str)
+    except json.JSONDecodeError as e:
+        print(f"JSON Parsing Error: {e}")
+        return None
+    except Exception as e:
+        print(f"Unexpected Error: {e}")
+        return None

sheet_manager/sheet_crud/create_col.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import pandas as pd
+from oauth2client.service_account import ServiceAccountCredentials
+import gspread
+from huggingface_hub import HfApi
+import os
+from dotenv import load_dotenv
+from enviroments.convert import get_json_from_env_var
+load_dotenv()
+def push_model_names_to_sheet(spreadsheet_url, sheet_name, access_token, organization):
+    """
+    Fetches model names from Hugging Face and updates a Google Sheet with the names, links, and HTML links.
+    Args:
+        json_key_path (str): Path to the Google service account JSON key file.
+        spreadsheet_url (str): URL of the Google Spreadsheet.
+        sheet_name (str): Name of the sheet to update.
+        access_token (str): Hugging Face access token.
+        organization (str): Organization name on Hugging Face.
+    """
+    # Authorize Google Sheets API
+    scope = ['https://spreadsheets.google.com/feeds',
+             'https://www.googleapis.com/auth/drive']
+    json_key_dict =get_json_from_env_var("GOOGLE_CREDENTIALS")
+    credential = ServiceAccountCredentials.from_json_keyfile_dict(json_key_dict, scope)
+    gc = gspread.authorize(credential)
+    # Open the Google Spreadsheet
+    doc = gc.open_by_url(spreadsheet_url)
+    sheet = doc.worksheet(sheet_name)
+    # Fetch existing data from the sheet
+    existing_data = pd.DataFrame(sheet.get_all_records())
+    # Fetch models from Hugging Face
+    api = HfApi()
+    models = api.list_models(author=organization, use_auth_token=access_token)
+    # Extract model names, links, and HTML links
+    model_details = [{
+        "Model name": model.modelId.split("/")[1],
+        "Model link": f"https://huggingface.co/{model.modelId}",
+        "Model": f"<a target=\"_blank\" href=\"https://huggingface.co/{model.modelId}\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">{model.modelId}</a>"
+    } for model in models]
+    new_data_df = pd.DataFrame(model_details)
+    # Check for duplicates and update only new model names
+    if "Model name" in existing_data.columns:
+        existing_model_names = existing_data["Model name"].tolist()
+    else:
+        existing_model_names = []
+    new_data_df = new_data_df[~new_data_df["Model name"].isin(existing_model_names)]
+    if not new_data_df.empty:
+        # Append new model names, links, and HTML links to the existing data
+        updated_data = pd.concat([existing_data, new_data_df], ignore_index=True)
+        # Push updated data back to the sheet
+        updated_data = updated_data.replace([float('inf'), float('-inf')], None)  # Infinity 값을 None으로 변환
+        updated_data = updated_data.fillna('')  # NaN 값을 빈 문자열로 변환
+        sheet.update([updated_data.columns.values.tolist()] + updated_data.values.tolist())
+        print("New model names, links, and HTML links successfully added to Google Sheet.")
+    else:
+        print("No new model names to add.")
+# Example usage
+if __name__ == "__main__":
+    spreadsheet_url = os.getenv("SPREADSHEET_URL")
+    access_token = os.getenv("ACCESS_TOKEN")
+    sheet_name = "시트1"
+    organization = "PIA-SPACE-LAB"
+    push_model_names_to_sheet(spreadsheet_url, sheet_name, access_token, organization)

sheet_manager/sheet_crud/sheet_crud.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import os
+from oauth2client.service_account import ServiceAccountCredentials
+import gspread
+from dotenv import load_dotenv
+from enviroments.convert import get_json_from_env_var
+from typing import Optional, List
+load_dotenv(override=True)
+class SheetManager:
+    def __init__(self, spreadsheet_url: Optional[str] = None,
+                 worksheet_name: str = "flag",
+                 column_name: str = "huggingface_id"):
+        """
+        Initialize SheetManager with Google Sheets credentials and connection.
+        Args:
+            spreadsheet_url (str, optional): URL of the Google Spreadsheet.
+                                           If None, takes from environment variable.
+            worksheet_name (str): Name of the worksheet to operate on.
+                                Defaults to "flag".
+            column_name (str): Name of the column to operate on.
+                             Defaults to "huggingface_id".
+        """
+        self.spreadsheet_url = spreadsheet_url or os.getenv("SPREADSHEET_URL")
+        if not self.spreadsheet_url:
+            raise ValueError("Spreadsheet URL not provided and not found in environment variables")
+        self.worksheet_name = worksheet_name
+        self.column_name = column_name
+        # Initialize credentials and client
+        self._init_google_client()
+        # Initialize sheet connection
+        self.doc = None
+        self.sheet = None
+        self.col_index = None
+        self._connect_to_sheet(validate_column=True)
+    def _init_google_client(self):
+        """Initialize Google Sheets client with credentials."""
+        scope = ['https://spreadsheets.google.com/feeds',
+                 'https://www.googleapis.com/auth/drive']
+        json_key_dict = get_json_from_env_var("GOOGLE_CREDENTIALS")
+        credentials = ServiceAccountCredentials.from_json_keyfile_dict(json_key_dict, scope)
+        self.client = gspread.authorize(credentials)
+    def _connect_to_sheet(self, validate_column: bool = True):
+        """
+        Connect to the specified Google Sheet and initialize necessary attributes.
+        Args:
+            validate_column (bool): Whether to validate the column name exists
+        """
+        try:
+            self.doc = self.client.open_by_url(self.spreadsheet_url)
+            # Try to get the worksheet
+            try:
+                self.sheet = self.doc.worksheet(self.worksheet_name)
+            except gspread.exceptions.WorksheetNotFound:
+                raise ValueError(f"Worksheet '{self.worksheet_name}' not found in spreadsheet")
+            # Get headers
+            self.headers = self.sheet.row_values(1)
+            # Validate column only if requested
+            if validate_column:
+                try:
+                    self.col_index = self.headers.index(self.column_name) + 1
+                except ValueError:
+                    # If column not found, use first available column
+                    if self.headers:
+                        self.column_name = self.headers[0]
+                        self.col_index = 1
+                        print(f"Column '{self.column_name}' not found. Using first available column: '{self.headers[0]}'")
+                    else:
+                        raise ValueError("No columns found in worksheet")
+        except Exception as e:
+            if isinstance(e, ValueError):
+                raise e
+            raise ConnectionError(f"Failed to connect to sheet: {str(e)}")
+    def change_worksheet(self, worksheet_name: str, column_name: Optional[str] = None):
+        """
+        Change the current worksheet and optionally the column.
+        Args:
+            worksheet_name (str): Name of the worksheet to switch to
+            column_name (str, optional): Name of the column to switch to
+        """
+        old_worksheet = self.worksheet_name
+        old_column = self.column_name
+        try:
+            self.worksheet_name = worksheet_name
+            if column_name:
+                self.column_name = column_name
+            # First connect without column validation
+            self._connect_to_sheet(validate_column=False)
+            # Then validate the column if specified
+            if column_name:
+                self.change_column(column_name)
+            else:
+                # Validate existing column in new worksheet
+                try:
+                    self.col_index = self.headers.index(self.column_name) + 1
+                except ValueError:
+                    # If column not found, use first available column
+                    if self.headers:
+                        self.column_name = self.headers[0]
+                        self.col_index = 1
+                        print(f"Column '{old_column}' not found in new worksheet. Using first available column: '{self.headers[0]}'")
+                    else:
+                        raise ValueError("No columns found in worksheet")
+            print(f"Successfully switched to worksheet: {worksheet_name}, using column: {self.column_name}")
+        except Exception as e:
+            # Restore previous state on error
+            self.worksheet_name = old_worksheet
+            self.column_name = old_column
+            self._connect_to_sheet()
+            raise e
+    def change_column(self, column_name: str):
+        """
+        Change the target column.
+        Args:
+            column_name (str): Name of the column to switch to
+        """
+        if not self.headers:
+            self.headers = self.sheet.row_values(1)
+        try:
+            self.col_index = self.headers.index(column_name) + 1
+            self.column_name = column_name
+            print(f"Successfully switched to column: {column_name}")
+        except ValueError:
+            raise ValueError(f"Column '{column_name}' not found in worksheet. Available columns: {', '.join(self.headers)}")
+    def get_available_worksheets(self) -> List[str]:
+        """Get list of all available worksheets in the spreadsheet."""
+        return [worksheet.title for worksheet in self.doc.worksheets()]
+    def get_available_columns(self) -> List[str]:
+        """Get list of all available columns in the current worksheet."""
+        return self.headers if self.headers else self.sheet.row_values(1)
+    def _reconnect_if_needed(self):
+        """Reconnect to the sheet if the connection is lost."""
+        try:
+            self.sheet.row_values(1)
+        except (gspread.exceptions.APIError, AttributeError):
+            self._init_google_client()
+            self._connect_to_sheet()
+    def _fetch_column_data(self) -> List[str]:
+        """Fetch all data from the huggingface_id column."""
+        values = self.sheet.col_values(self.col_index)
+        return values[1:]  # Exclude header
+    def _update_sheet(self, data: List[str]):
+        """Update the entire column with new data."""
+        try:
+            # Prepare the range for update (excluding header)
+            start_cell = gspread.utils.rowcol_to_a1(2, self.col_index)  # Start from row 2
+            end_cell = gspread.utils.rowcol_to_a1(len(data) + 2, self.col_index)
+            range_name = f"{start_cell}:{end_cell}"
+            # Convert data to 2D array format required by gspread
+            cells = [[value] for value in data]
+            # Update the range
+            self.sheet.update(range_name, cells)
+        except Exception as e:
+            print(f"Error updating sheet: {str(e)}")
+            raise
+    def push(self, text: str) -> int:
+        """
+        Push a text value to the next empty cell in the huggingface_id column.
+        Args:
+            text (str): Text to push to the sheet
+        Returns:
+            int: The row number where the text was pushed
+        """
+        try:
+            self._reconnect_if_needed()
+            # Get all values in the huggingface_id column
+            column_values = self.sheet.col_values(self.col_index)
+            # Find the next empty row
+            next_row = None
+            for i in range(1, len(column_values)):
+                if not column_values[i].strip():
+                    next_row = i + 1
+                    break
+            # If no empty row found, append to the end
+            if next_row is None:
+                next_row = len(column_values) + 1
+            # Update the cell
+            self.sheet.update_cell(next_row, self.col_index, text)
+            print(f"Successfully pushed value: {text} to row {next_row}")
+            return next_row
+        except Exception as e:
+            print(f"Error pushing to sheet: {str(e)}")
+            raise
+    def pop(self) -> Optional[str]:
+        """Remove and return the most recent value."""
+        try:
+            self._reconnect_if_needed()
+            data = self._fetch_column_data()
+            if not data or not data[0].strip():
+                return None
+            value = data.pop(0)  # Remove first value
+            data.append("")  # Add empty string at the end to maintain sheet size
+            self._update_sheet(data)
+            print(f"Successfully popped value: {value}")
+            return value
+        except Exception as e:
+            print(f"Error popping from sheet: {str(e)}")
+            raise
+    def delete(self, value: str) -> List[int]:
+        """Delete all occurrences of a value."""
+        try:
+            self._reconnect_if_needed()
+            data = self._fetch_column_data()
+            # Find all indices before deletion
+            indices = [i + 1 for i, v in enumerate(data) if v.strip() == value.strip()]
+            if not indices:
+                print(f"Value '{value}' not found in sheet")
+                return []
+            # Remove matching values and add empty strings at the end
+            data = [v for v in data if v.strip() != value.strip()]
+            data.extend([""] * len(indices))  # Add empty strings to maintain sheet size
+            self._update_sheet(data)
+            print(f"Successfully deleted value '{value}' from rows: {indices}")
+            return indices
+        except Exception as e:
+            print(f"Error deleting from sheet: {str(e)}")
+            raise
+    def update_cell_by_condition(self, condition_column: str, condition_value: str, target_column: str, target_value: str) -> Optional[int]:
+        """
+        Update the value of a cell based on a condition in another column.
+        Args:
+            condition_column (str): The column to check the condition on.
+            condition_value (str): The value to match in the condition column.
+            target_column (str): The column where the value should be updated.
+            target_value (str): The new value to set in the target column.
+        Returns:
+            Optional[int]: The row number where the value was updated, or None if no matching row was found.
+        """
+        try:
+            self._reconnect_if_needed()
+            # Get all column headers
+            headers = self.sheet.row_values(1)
+            # Find the indices for the condition and target columns
+            try:
+                condition_col_index = headers.index(condition_column) + 1
+            except ValueError:
+                raise ValueError(f"조건 칼럼 '{condition_column}'이(가) 없습니다.")
+            try:
+                target_col_index = headers.index(target_column) + 1
+            except ValueError:
+                raise ValueError(f"목표 칼럼 '{target_column}'이(가) 없습니다.")
+            # Get all rows of data
+            data = self.sheet.get_all_records()
+            # Find the row that matches the condition
+            for i, row in enumerate(data):
+                if row.get(condition_column) == condition_value:
+                    # Update the target column in the matching row
+                    row_number = i + 2  # Row index starts at 2 (1 is header)
+                    self.sheet.update_cell(row_number, target_col_index, target_value)
+                    print(f"Updated row {row_number}: Set {target_column} to '{target_value}' where {condition_column} is '{condition_value}'")
+                    return row_number
+            print(f"조건에 맞는 행을 찾을 수 없습니다: {condition_column} = '{condition_value}'")
+            return None
+        except Exception as e:
+            print(f"Error updating cell by condition: {str(e)}")
+            raise
+    def get_all_values(self) -> List[str]:
+        """Get all values from the huggingface_id column."""
+        self._reconnect_if_needed()
+        return [v for v in self._fetch_column_data() if v.strip()]
+# Example usage
+if __name__ == "__main__":
+    # Initialize sheet manager
+    sheet_manager = SheetManager()
+    # # Push some test values
+    # sheet_manager.push("test-model-1")
+    # sheet_manager.push("test-model-2")
+    # sheet_manager.push("test-model-3")
+    # print("Initial values:", sheet_manager.get_all_values())
+    # # Pop the most recent value
+    # popped = sheet_manager.pop()
+    # print(f"Popped value: {popped}")
+    # print("After pop:", sheet_manager.get_all_values())
+    # # Delete a specific value
+    # deleted_rows = sheet_manager.delete("test-model-2")
+    # print(f"Deleted from rows: {deleted_rows}")
+    # print("After delete:", sheet_manager.get_all_values())
+    row_updated = sheet_manager.update_cell_by_condition(
+        condition_column="model",
+        condition_value="msr",
+        target_column="pia",
+        target_value="new_value"
+    )

sheet_manager/sheet_loader/sheet2df.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import pandas as pd
+from oauth2client.service_account import ServiceAccountCredentials
+import gspread
+from dotenv import load_dotenv
+import os
+import json
+from enviroments.convert import get_json_from_env_var
+load_dotenv()
+def sheet2df(sheet_name:str = "model"):
+    """
+    Reads data from a specified Google Spreadsheet and converts it into a Pandas DataFrame.
+    Steps:
+    1. Authenticate using a service account JSON key.
+    2. Open the spreadsheet by its URL.
+    3. Select the worksheet to read.
+    4. Convert the worksheet data to a Pandas DataFrame.
+    5. Clean up the DataFrame:
+        - Rename columns using the first row of data.
+        - Drop the first row after renaming columns.
+    Returns:
+        pd.DataFrame: A Pandas DataFrame containing the cleaned data from the spreadsheet.
+    Note:
+    - The following variables must be configured before using this function:
+      - `json_key_path`: Path to the service account JSON key file.
+      - `spreadsheet_url`: URL of the Google Spreadsheet.
+      - `sheet_name`: Name of the worksheet to load.
+    Dependencies:
+    - pandas
+    - gspread
+    - oauth2client
+    """
+    scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
+    json_key_dict =get_json_from_env_var("GOOGLE_CREDENTIALS")
+    credential = ServiceAccountCredentials.from_json_keyfile_dict(json_key_dict, scope)
+    gc = gspread.authorize(credential)
+    spreadsheet_url = os.getenv("SPREADSHEET_URL")
+    doc = gc.open_by_url(spreadsheet_url)
+    sheet = doc.worksheet(sheet_name)
+    # Convert to DataFrame
+    df = pd.DataFrame(sheet.get_all_values())
+    # Clean DataFrame
+    df.rename(columns=df.iloc[0], inplace=True)
+    df.drop(df.index[0], inplace=True)
+    return df

sheet_manager/sheet_monitor/sheet_sync.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import threading
+import time
+from typing import Optional, Callable
+import logging
+class SheetMonitor:
+    def __init__(self, sheet_manager, check_interval: float = 1.0):
+        """
+        Initialize SheetMonitor with a sheet manager instance.
+        """
+        self.sheet_manager = sheet_manager
+        self.check_interval = check_interval
+        # Threading control
+        self.monitor_thread = None
+        self.is_running = threading.Event()
+        self.pause_monitoring = threading.Event()
+        self.monitor_paused = threading.Event()
+        # Queue status
+        self.has_data = threading.Event()
+        # Logging setup
+        logging.basicConfig(level=logging.INFO)
+        self.logger = logging.getLogger(__name__)
+    def start_monitoring(self):
+        """Start the monitoring thread."""
+        if self.monitor_thread is not None and self.monitor_thread.is_alive():
+            self.logger.warning("Monitoring thread is already running")
+            return
+        self.is_running.set()
+        self.pause_monitoring.clear()
+        self.monitor_thread = threading.Thread(target=self._monitor_loop)
+        self.monitor_thread.daemon = True
+        self.monitor_thread.start()
+        self.logger.info("Started monitoring thread")
+    def stop_monitoring(self):
+        """Stop the monitoring thread."""
+        self.is_running.clear()
+        if self.monitor_thread:
+            self.monitor_thread.join()
+        self.logger.info("Stopped monitoring thread")
+    def pause(self):
+        """Pause the monitoring."""
+        self.pause_monitoring.set()
+        self.monitor_paused.wait()
+        self.logger.info("Monitoring paused")
+    def resume(self):
+        """Resume the monitoring."""
+        self.pause_monitoring.clear()
+        self.monitor_paused.clear()
+        # 즉시 체크 수행
+        self.logger.info("Monitoring resumed, checking for new data...")
+        values = self.sheet_manager.get_all_values()
+        if values:
+            self.has_data.set()
+            self.logger.info(f"Found data after resume: {values}")
+    def _monitor_loop(self):
+        """Main monitoring loop that checks for data in sheet."""
+        while self.is_running.is_set():
+            if self.pause_monitoring.is_set():
+                self.monitor_paused.set()
+                self.pause_monitoring.wait()
+                self.monitor_paused.clear()
+                # continue
+            try:
+                # Check if there's any data in the sheet
+                values = self.sheet_manager.get_all_values()
+                self.logger.info(f"Monitoring: Current column={self.sheet_manager.column_name}, "
+                            f"Values found={len(values)}, "
+                            f"Has data={self.has_data.is_set()}")
+                if values:  # If there's any non-empty value
+                    self.has_data.set()
+                    self.logger.info(f"Data detected: {values}")
+                else:
+                    self.has_data.clear()
+                    self.logger.info("No data in sheet, waiting...")
+                time.sleep(self.check_interval)
+            except Exception as e:
+                self.logger.error(f"Error in monitoring loop: {str(e)}")
+                time.sleep(self.check_interval)
+class MainLoop:
+    def __init__(self, sheet_manager, sheet_monitor, callback_function: Callable = None):
+        """
+        Initialize MainLoop with sheet manager and monitor instances.
+        """
+        self.sheet_manager = sheet_manager
+        self.monitor = sheet_monitor
+        self.callback = callback_function
+        self.is_running = threading.Event()
+        self.logger = logging.getLogger(__name__)
+    def start(self):
+        """Start the main processing loop."""
+        self.is_running.set()
+        self.monitor.start_monitoring()
+        self._main_loop()
+    def stop(self):
+        """Stop the main processing loop."""
+        self.is_running.clear()
+        self.monitor.stop_monitoring()
+    def process_new_value(self):
+        """Process values by calling pop function for multiple columns and custom callback."""
+        try:
+            # Store original column
+            original_column = self.sheet_manager.column_name
+            # Pop from huggingface_id column
+            model_id = self.sheet_manager.pop()
+            if model_id:
+                # Pop from benchmark_name column
+                self.sheet_manager.change_column("benchmark_name")
+                benchmark_name = self.sheet_manager.pop()
+                # Pop from prompt_cfg_name column
+                self.sheet_manager.change_column("prompt_cfg_name")
+                prompt_cfg_name = self.sheet_manager.pop()
+                # Return to original column
+                self.sheet_manager.change_column(original_column)
+                self.logger.info(f"Processed values - model_id: {model_id}, "
+                            f"benchmark_name: {benchmark_name}, "
+                            f"prompt_cfg_name: {prompt_cfg_name}")
+                if self.callback:
+                    # Pass all three values to callback
+                    self.callback(model_id, benchmark_name, prompt_cfg_name)
+                return model_id, benchmark_name, prompt_cfg_name
+        except Exception as e:
+            self.logger.error(f"Error processing values: {str(e)}")
+            # Return to original column in case of error
+            try:
+                self.sheet_manager.change_column(original_column)
+            except:
+                pass
+            return None
+    def _main_loop(self):
+        """Main processing loop."""
+        while self.is_running.is_set():
+            # Wait for data to be available
+            if self.monitor.has_data.wait(timeout=1.0):
+                # Pause monitoring
+                self.monitor.pause()
+                # Process the value
+                self.process_new_value()
+                # Check if there's still data in the sheet
+                values = self.sheet_manager.get_all_values()
+                self.logger.info(f"After processing: Current column={self.sheet_manager.column_name}, "
+                            f"Values remaining={len(values)}")
+                if not values:
+                    self.monitor.has_data.clear()
+                    self.logger.info("All data processed, clearing has_data flag")
+                else:
+                    self.logger.info(f"Remaining data: {values}")
+                # Resume monitoring
+                self.monitor.resume()
+## TODO
+# API 분당 호출 문제로 만약에 참조하다가 실패할 경우 대기했다가 다시 시도하게끔 설계
+# Example usage
+if __name__ == "__main__":
+    import sys
+    from pathlib import Path
+    sys.path.append(str(Path(__file__).parent.parent.parent))
+    from sheet_manager.sheet_crud.sheet_crud import SheetManager
+    from pia_bench.pipe_line.piepline import PiaBenchMark
+    def my_custom_function(huggingface_id, benchmark_name, prompt_cfg_name):
+        piabenchmark = PiaBenchMark(huggingface_id, benchmark_name, prompt_cfg_name)
+        piabenchmark.bench_start()
+    # Initialize components
+    sheet_manager = SheetManager()
+    monitor = SheetMonitor(sheet_manager, check_interval=10.0)
+    main_loop = MainLoop(sheet_manager, monitor, callback_function=my_custom_function)
+    try:
+        main_loop.start()
+        while True:
+            time.sleep(5)
+    except KeyboardInterrupt:
+        main_loop.stop()

topk.json ADDED Viewed

	@@ -0,0 +1,88 @@

+{
+    "VIDEO_CFG": {
+        "window_size": 6,
+        "time_sampling": 15,
+        "tile_size": null
+    },
+    "MODEL_CFG": {
+        "_comment": "",
+        "_link": "",
+        "name": "assets/c7.pt",
+        "type": "clip4clip"
+    },
+    "PROMPT_CFG": [
+        {
+            "event": "falldown",
+            "top_candidates": 1,
+            "alert_threshold": 1,
+            "prompts": {
+                "normal": [
+                    {
+                        "sentence": "typical"
+                    }
+                ],
+                "abnormal": [
+                    {
+                        "sentence": "falldown"
+                    }
+                ]
+            }
+        },
+        {
+            "event": "violence",
+            "top_candidates": 1,
+            "alert_threshold": 1,
+            "prompts": {
+                "normal": [
+                    {
+                        "sentence": "normal"
+                    },
+                    {
+                        "sentence": "average"
+                    },
+                    {
+                        "sentence": "typical"
+                    }
+                ],
+                "abnormal": [
+                    {
+                        "sentence": "violence with kicking and punching"
+                    },
+                    {
+                        "sentence": "physical confrontation between people"
+                    },
+                    {
+                        "sentence": "violence"
+                    }
+                ]
+            }
+        },
+        {
+            "event": "fire",
+            "top_candidates": 1,
+            "alert_threshold": 1,
+            "prompts": {
+                "normal": [
+                    {
+                        "sentence": "tomato"
+                    }
+                ],
+                "abnormal": [
+                    {
+                        "sentence": "fire"
+                    },
+                    {
+                        "sentence": "video of be on fire with a stove"
+                    },
+                    {
+                        "sentence": "a fire is burning"
+                    },
+                    {
+                        "sentence": "embers are burning"
+                    }
+                ]
+            }
+        }
+    ]
+}

utils/bench_meta.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+import cv2
+import pandas as pd
+import gradio as gr
+import matplotlib.pyplot as plt
+from utils.except_dir import cust_listdir
+def get_video_metadata(video_path, category, benchmark):
+    """Extract metadata from a video file."""
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        return None
+    # Extract metadata
+    video_name = os.path.basename(video_path)
+    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    resolution = f"{frame_width}x{frame_height}"
+    duration_seconds = frame_count / fps if fps > 0 else 0
+    aspect_ratio = round(frame_width / frame_height, 2) if frame_height > 0 else 0
+    file_size = os.path.getsize(video_path) / (1024 * 1024)  # MB
+    file_format = os.path.splitext(video_name)[1].lower()
+    cap.release()
+    return {
+        "video_name": video_name,
+        "resolution": resolution,
+        "video_duration": f"{duration_seconds // 60:.0f}:{duration_seconds % 60:.0f}",
+        "category": category,
+        "benchmark": benchmark,
+        "duration_seconds": duration_seconds,
+        "total_frames": frame_count,
+        "file_format": file_format,
+        "file_size_mb": round(file_size, 2),
+        "aspect_ratio": aspect_ratio,
+        "fps": fps
+    }
+def process_videos_in_directory(root_dir):
+    """Process all videos in the given directory structure."""
+    video_metadata_list = []
+    # 벤치마크 폴더들을 순회
+    for benchmark in cust_listdir(root_dir):
+        benchmark_path = os.path.join(root_dir, benchmark)
+        if not os.path.isdir(benchmark_path):
+            continue
+        # dataset 폴더 경로
+        dataset_path = os.path.join(benchmark_path, "dataset")
+        if not os.path.isdir(dataset_path):
+            continue
+        # dataset 폴더 안의 카테고리 폴더들을 순회
+        for category in cust_listdir(dataset_path):
+            category_path = os.path.join(dataset_path, category)
+            if not os.path.isdir(category_path):
+                continue
+            # 각 카테고리 폴더 안의 비디오 파일들을 처리
+            for file in cust_listdir(category_path):
+                file_path = os.path.join(category_path, file)
+                if file_path.lower().endswith(('.mp4', '.avi', '.mkv', '.mov', 'MOV')):
+                    metadata = get_video_metadata(file_path, category, benchmark)
+                    if metadata:
+                        video_metadata_list.append(metadata)
+    # df = pd.DataFrame(video_metadata_list)
+    # df.to_csv('sample.csv', index=False)
+    return pd.DataFrame(video_metadata_list)

utils/except_dir.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import os
+from typing import List
+import enviroments.config as config
+def cust_listdir(directory: str) -> List[str]:
+    """
+    os.listdir와 유사하게 작동하지만 config에 정의된 폴더/파일들을 제외하고 목록을 반환합니다.
+    Args:
+        directory (str): 탐색할 디렉토리 경로
+    Returns:
+        List[str]: config의 EXCLUDE_DIRS에 정의된 폴더/파일들을 제외한 목록
+    """
+    return [item for item in os.listdir(directory) if item not in config.EXCLUDE_DIRS]

utils/hf_api.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from huggingface_hub import HfApi
+from typing import Optional, List, Dict, Any
+from dataclasses import dataclass
+@dataclass
+class ModelInfo:
+    """모델 정보를 저장하는 데이터 클래스"""
+    model_id: str
+    last_modified: Any
+    downloads: int
+    private: bool
+    attributes: Dict[str, Any]
+class HuggingFaceInfoManager:
+    def __init__(self, access_token: Optional[str] = None, organization: str = "PIA-SPACE-LAB"):
+        """
+        HuggingFace API 관리자 클래스 초기화
+        Args:
+            access_token (str, optional): HuggingFace 액세스 토큰
+            organization (str): 조직 이름 (기본값: "PIA-SPACE-LAB")
+        Raises:
+            ValueError: access_token이 None일 경우 발생
+        """
+        if access_token is None:
+            raise ValueError("액세스 토큰은 필수 입력값입니다. HuggingFace에서 발급받은 토큰을 입력해주세요.")
+        self.api = HfApi()
+        self.access_token = access_token
+        self.organization = organization
+        # API 호출 결과를 바로 처리하여 저장
+        api_models = self.api.list_models(author=self.organization, use_auth_token=self.access_token)
+        self._stored_models = []
+        self._model_infos = []
+        # 모든 모델 정보를 미리 처리하여 저장
+        for model in api_models:
+            # 기본 정보 저장
+            model_attrs = {}
+            for attr in dir(model):
+                if not attr.startswith("_"):
+                    model_attrs[attr] = getattr(model, attr)
+            # ModelInfo 객체 생성 및 저장
+            model_info = ModelInfo(
+                model_id=model.modelId,
+                last_modified=model.lastModified,
+                downloads=model.downloads,
+                private=model.private,
+                attributes=model_attrs
+            )
+            self._model_infos.append(model_info)
+            self._stored_models.append(model)
+    def get_model_info(self) -> List[Dict[str, Any]]:
+        """모든 모델의 정보를 반환"""
+        return [
+            {
+                'model_id': info.model_id,
+                'last_modified': info.last_modified,
+                'downloads': info.downloads,
+                'private': info.private,
+                **info.attributes
+            }
+            for info in self._model_infos
+        ]
+    def get_model_ids(self) -> List[str]:
+        """모든 모델의 ID 리스트 반환"""
+        return [info.model_id for info in self._model_infos]
+    def get_private_models(self) -> List[Dict[str, Any]]:
+        """비공개 모델 정보 반환"""
+        return [
+            {
+                'model_id': info.model_id,
+                'last_modified': info.last_modified,
+                'downloads': info.downloads,
+                'private': info.private,
+                **info.attributes
+            }
+            for info in self._model_infos if info.private
+        ]
+    def get_public_models(self) -> List[Dict[str, Any]]:
+        """공개 모델 정보 반환"""
+        return [
+            {
+                'model_id': info.model_id,
+                'last_modified': info.last_modified,
+                'downloads': info.downloads,
+                'private': info.private,
+                **info.attributes
+            }
+            for info in self._model_infos if not info.private
+        ]
+    def refresh_models(self) -> None:
+        """모델 정보 새로고침 (새로운 API 호출 수행)"""
+        # 클래스 재초기화
+        self.__init__(self.access_token, self.organization)

utils/parser.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import json
+from typing import Dict, List, Tuple
+def load_config(config_path: str) -> Dict:
+    """JSON 설정 파일을 읽어서 딕셔너리로 반환"""
+    with open(config_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+class PromptManager:
+    def __init__(self, config_path: str):
+        self.config = load_config(config_path)
+        self.sentences, self.index_mapping = self._extract_all_sentences_with_index()
+        self.reverse_mapping = self._create_reverse_mapping()
+    def _extract_all_sentences_with_index(self) -> Tuple[List[str], Dict]:
+        """모든 sentence와 인덱스 매핑 추출"""
+        sentences = []
+        index_mapping = {}
+        for event_idx, event_config in enumerate(self.config.get('PROMPT_CFG', [])):
+            prompts = event_config.get('prompts', {})
+            for status in ['normal', 'abnormal']:
+                for prompt_idx, prompt in enumerate(prompts.get(status, [])):
+                    sentence = prompt.get('sentence', '')
+                    sentences.append(sentence)
+                    index_mapping[(event_idx, status, prompt_idx)] = sentence
+        return sentences, index_mapping
+    def _create_reverse_mapping(self) -> Dict:
+        """sentence -> indices 역방향 매핑 생성"""
+        reverse_map = {}
+        for indices, sent in self.index_mapping.items():
+            if sent not in reverse_map:
+                reverse_map[sent] = []
+            reverse_map[sent].append(indices)
+        return reverse_map
+    def get_sentence_indices(self, sentence: str) -> List[Tuple[int, str, int]]:
+        """특정 sentence의 모든 인덱스 위치 반환"""
+        return self.reverse_mapping.get(sentence, [])
+    def get_details_by_sentence(self, sentence: str) -> List[Dict]:
+        """sentence로 모든 관련 상세 정보 찾아 반환"""
+        indices = self.get_sentence_indices(sentence)
+        return [self.get_details_by_index(*idx) for idx in indices]
+    def get_details_by_index(self, event_idx: int, status: str, prompt_idx: int) -> Dict:
+        """인덱스로 상세 정보 찾아 반환"""
+        event_config = self.config['PROMPT_CFG'][event_idx]
+        prompt = event_config['prompts'][status][prompt_idx]
+        return {
+            'event': event_config['event'],
+            'status': status,
+            'sentence': prompt['sentence'],
+            'top_candidates': event_config['top_candidates'],
+            'alert_threshold': event_config['alert_threshold'],
+            'event_idx': event_idx,
+            'prompt_idx': prompt_idx
+        }
+    def get_all_sentences(self) -> List[str]:
+        """모든 sentence 리스트 반환"""
+        return self.sentences