Second
Browse files
app.py
CHANGED
|
@@ -4,19 +4,58 @@ import pandas as pd
|
|
| 4 |
# Set page configuration
|
| 5 |
st.set_page_config(page_title="Cyber Benchmark Hub: SECQA Leaderboard", layout="wide")
|
| 6 |
|
| 7 |
-
# Main Title
|
| 8 |
st.title("Cyber Benchmark Hub: SECQA Leaderboard")
|
| 9 |
-
st.markdown("## Powered by **Priam Cyber AI**")
|
| 10 |
st.markdown("#### [View the SECQA Dataset](https://huggingface.co/datasets/zefang-liu/secqa)")
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
# Function to load and clean CSV data
|
| 13 |
@st.cache_data
|
| 14 |
def load_data(file_path):
|
| 15 |
df = pd.read_csv(file_path)
|
| 16 |
-
|
| 17 |
# Remove any unnamed columns (caused by trailing commas)
|
| 18 |
df = df.loc[:, ~df.columns.str.contains('Unnamed', na=False)]
|
| 19 |
-
|
| 20 |
# Standardize column names
|
| 21 |
df.columns = df.columns.str.strip()
|
| 22 |
df.rename(columns={
|
|
@@ -25,35 +64,23 @@ def load_data(file_path):
|
|
| 25 |
"v1 metric": "V1 Accuracy",
|
| 26 |
"v2 metric": "V2 Accuracy"
|
| 27 |
}, inplace=True)
|
| 28 |
-
|
| 29 |
# Convert percentage strings to floats (e.g., "100%" → 1.0)
|
| 30 |
for col in ["V1 Accuracy", "V2 Accuracy"]:
|
| 31 |
df[col] = df[col].astype(str).str.replace("%", "").str.strip()
|
| 32 |
df[col] = pd.to_numeric(df[col], errors='coerce') / 100
|
| 33 |
-
|
| 34 |
return df
|
| 35 |
|
| 36 |
# Load dataset
|
| 37 |
-
file_path = "Benchmark.csv" # Ensure this file is uploaded in your Hugging Face Space
|
| 38 |
df = load_data(file_path)
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
source_filter = st.multiselect(
|
| 47 |
-
"Select Model Type",
|
| 48 |
-
options=df["Type"].unique().tolist(),
|
| 49 |
-
default=df["Type"].unique().tolist()
|
| 50 |
-
)
|
| 51 |
-
st.markdown("---")
|
| 52 |
-
st.header("Test Parameters")
|
| 53 |
-
test_params = pd.DataFrame({
|
| 54 |
-
"Value": [0, 1, 0, 1, 0]
|
| 55 |
-
}, index=["Temperature", "n", "Presence Penalty", "Top_p", "Frequency Penalty"])
|
| 56 |
-
st.table(test_params)
|
| 57 |
|
| 58 |
# Apply filtering based on the sidebar selections
|
| 59 |
df_filtered = df[df["Type"].isin(source_filter)] if source_filter else df
|
|
@@ -70,7 +97,7 @@ df_filtered.insert(0, "Rank", range(1, len(df_filtered) + 1))
|
|
| 70 |
col1, col2 = st.columns([2, 1])
|
| 71 |
|
| 72 |
with col1:
|
| 73 |
-
st.subheader(f"Leaderboard for
|
| 74 |
st.dataframe(df_filtered.reset_index(drop=True))
|
| 75 |
|
| 76 |
with col2:
|
|
|
|
| 4 |
# Set page configuration
|
| 5 |
st.set_page_config(page_title="Cyber Benchmark Hub: SECQA Leaderboard", layout="wide")
|
| 6 |
|
| 7 |
+
# Main Title
|
| 8 |
st.title("Cyber Benchmark Hub: SECQA Leaderboard")
|
|
|
|
| 9 |
st.markdown("#### [View the SECQA Dataset](https://huggingface.co/datasets/zefang-liu/secqa)")
|
| 10 |
|
| 11 |
+
# Sidebar: Logo and Website Link
|
| 12 |
+
with st.sidebar:
|
| 13 |
+
st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg", use_container_width=True)
|
| 14 |
+
st.markdown("[Priam.ai](https://www.priam.ai/)")
|
| 15 |
+
st.divider()
|
| 16 |
+
|
| 17 |
+
# Top-level: Dataset Category
|
| 18 |
+
dataset_categories = ["Multiple Choice", "Open Question", "Steps (Reasoning)"]
|
| 19 |
+
selected_category = st.selectbox("Select Dataset Category", dataset_categories, index=0)
|
| 20 |
+
|
| 21 |
+
# Filter dataset options based on category
|
| 22 |
+
datasets_by_category = {
|
| 23 |
+
"Multiple Choice": ["secQA"],
|
| 24 |
+
"Open Question": ["Testing..."],
|
| 25 |
+
"Steps (Reasoning)": ["Testing..."]
|
| 26 |
+
}
|
| 27 |
+
dataset_choice = st.selectbox("Select Dataset", datasets_by_category[selected_category], index=0)
|
| 28 |
+
|
| 29 |
+
st.divider()
|
| 30 |
+
st.header("Filters & Options")
|
| 31 |
+
dataset_version = st.radio("Select Dataset Version", ["v1", "v2"])
|
| 32 |
+
# For filtering the leaderboard by model type
|
| 33 |
+
# Note: The available model types will come from the CSV, once loaded.
|
| 34 |
+
# We'll load the CSV later and then update this filter accordingly.
|
| 35 |
+
source_filter_placeholder = st.empty() # placeholder for source filter after data is loaded
|
| 36 |
+
|
| 37 |
+
st.markdown("---")
|
| 38 |
+
st.header("Test Parameters")
|
| 39 |
+
test_params = pd.DataFrame({
|
| 40 |
+
"Value": [0, 1, 0, 1, 0]
|
| 41 |
+
}, index=["Temperature", "n", "Presence Penalty", "Top_p", "Frequency Penalty"])
|
| 42 |
+
st.table(test_params)
|
| 43 |
+
|
| 44 |
+
# Determine file path based on dataset choice.
|
| 45 |
+
# For now, if dataset_choice is "secQA", we use "Benchmark.csv"
|
| 46 |
+
if dataset_choice == "secQA":
|
| 47 |
+
file_path = "Benchmark.csv" # Ensure this file is uploaded in your Hugging Face Space
|
| 48 |
+
else:
|
| 49 |
+
file_path = "Benchmark.csv" # Placeholder: update with actual file paths for future datasets
|
| 50 |
+
|
| 51 |
# Function to load and clean CSV data
|
| 52 |
@st.cache_data
|
| 53 |
def load_data(file_path):
|
| 54 |
df = pd.read_csv(file_path)
|
| 55 |
+
|
| 56 |
# Remove any unnamed columns (caused by trailing commas)
|
| 57 |
df = df.loc[:, ~df.columns.str.contains('Unnamed', na=False)]
|
| 58 |
+
|
| 59 |
# Standardize column names
|
| 60 |
df.columns = df.columns.str.strip()
|
| 61 |
df.rename(columns={
|
|
|
|
| 64 |
"v1 metric": "V1 Accuracy",
|
| 65 |
"v2 metric": "V2 Accuracy"
|
| 66 |
}, inplace=True)
|
| 67 |
+
|
| 68 |
# Convert percentage strings to floats (e.g., "100%" → 1.0)
|
| 69 |
for col in ["V1 Accuracy", "V2 Accuracy"]:
|
| 70 |
df[col] = df[col].astype(str).str.replace("%", "").str.strip()
|
| 71 |
df[col] = pd.to_numeric(df[col], errors='coerce') / 100
|
| 72 |
+
|
| 73 |
return df
|
| 74 |
|
| 75 |
# Load dataset
|
|
|
|
| 76 |
df = load_data(file_path)
|
| 77 |
|
| 78 |
+
# Update the source filter with the actual options from the data
|
| 79 |
+
source_filter = source_filter_placeholder.multiselect(
|
| 80 |
+
"Select Model Type",
|
| 81 |
+
options=df["Type"].unique().tolist(),
|
| 82 |
+
default=df["Type"].unique().tolist()
|
| 83 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
# Apply filtering based on the sidebar selections
|
| 86 |
df_filtered = df[df["Type"].isin(source_filter)] if source_filter else df
|
|
|
|
| 97 |
col1, col2 = st.columns([2, 1])
|
| 98 |
|
| 99 |
with col1:
|
| 100 |
+
st.subheader(f"Leaderboard for {dataset_choice.upper()} Version {dataset_version}")
|
| 101 |
st.dataframe(df_filtered.reset_index(drop=True))
|
| 102 |
|
| 103 |
with col2:
|