Spaces:
Sleeping
Sleeping
File size: 6,255 Bytes
50509cd d562907 c0191d4 668743e c0191d4 50509cd d562907 a56be8b c0191d4 668743e c0191d4 668743e d562907 c0191d4 668743e 0a7b185 668743e 0a7b185 b55935c 0a7b185 668743e 0a7b185 b55935c 668743e b55935c c0191d4 668743e c0191d4 b55935c 668743e 687eace c0191d4 76b45f4 c0191d4 d562907 c0191d4 0a7b185 c0191d4 0a7b185 b55935c 0a7b185 d562907 923b516 0a7b185 668743e b55935c 923b516 b55935c 0a7b185 b55935c e045c77 c0191d4 b55935c 0a7b185 b55935c d562907 b55935c 668743e b55935c 668743e b55935c 668743e b55935c e045c77 b55935c c0191d4 b55935c 923b516 668743e 923b516 c0191d4 668743e 687eace c0191d4 64862fd c0191d4 668743e b55935c 668743e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
import gradio as gr
import pandas as pd
import os
import hashlib
from datasets import Dataset, concatenate_datasets, load_dataset
from huggingface_hub import login
# ============================
# CONFIGURATION
# ============================
AUDIO_FOLDER = "audio_files"
audio_files = sorted(os.listdir(AUDIO_FOLDER))
ADMIN_PASSWORD_HASH = "eb32da077dfaf326cd6f73e0716b628da6427aa318a2d0b9fafa9ef315b5e885"
# ============================
# HF DATASET CONFIGURATION
# ============================
HF_DATASET_NAME = "MeysamSh/Annotate_Samples" # replace with your HF dataset
HF_TOKEN = os.environ.get("HF_TOKEN") # store your token as a secret in Spaces
login(HF_TOKEN)
# ============================
# USER FUNCTIONS
# ============================
def load_hf_dataset():
"""Load existing HF Dataset or create empty one if not exists."""
try:
ds = load_dataset(HF_DATASET_NAME, split="train")
except:
# Dataset does not exist yet
df = pd.DataFrame(columns=["user_id", "gender", "audio_file", "score"])
ds = Dataset.from_pandas(df)
ds.push_to_hub(HF_DATASET_NAME, private=True)
return ds
def submit_annotation_hf(user_id, gender, score, audio_file):
"""Append annotation to HF Dataset."""
ds = load_hf_dataset()
new_row = pd.DataFrame([{
"user_id": user_id,
"gender": gender,
"audio_file": audio_file,
"score": score
}])
ds_new = Dataset.from_pandas(new_row)
ds_combined = concatenate_datasets([ds, ds_new])
ds_combined.push_to_hub(HF_DATASET_NAME)
def load_next_audio(state):
if state is None:
state = {"index": 0}
idx = state["index"]
if idx >= len(audio_files):
return None, state, {"played": 0}, "All audio files annotated."
filepath = os.path.join(AUDIO_FOLDER, audio_files[idx])
return filepath, state, {"played": 0}, f"Loaded {audio_files[idx]}"
def submit_annotation(user_id, gender, score, state):
if state is None:
state = {"index": 0}
idx = state["index"]
if idx >= len(audio_files):
return state, "No more audio files."
audio_file = audio_files[idx]
submit_annotation_hf(user_id, gender, score, audio_file)
state["index"] += 1
return state, f"Saved rating for {audio_file}."
# ============================
# SUBMIT BUTTON CONTROL
# ============================
def check_submit_ready(user_id, audio_played, score):
ready = len(user_id.strip()) > 1 and audio_played['played'] == 1 and score != "None"
return gr.update(interactive=ready)
def mark_audio_played():
return {"played": 1}
# ============================
# ADMIN FUNCTIONS
# ============================
def hash_password(password: str) -> str:
return hashlib.sha256(password.encode()).hexdigest()
def admin_login(input_password):
if hash_password(input_password) == ADMIN_PASSWORD_HASH:
ds = load_hf_dataset()
df = ds.to_pandas()
# For download: write temp CSV
temp_csv = "annotations_export.csv"
df.to_csv(temp_csv, index=False)
return (
gr.update(visible=True),
df,
temp_csv,
gr.update(value="Admin authentication successful.")
)
else:
return (
gr.update(visible=False),
None,
None,
gr.update(value="Admin authentication failed.")
)
# ============================
# GRADIO UI
# ============================
with gr.Blocks() as demo:
gr.Markdown("# Audio MOS Annotation Tool")
# --------------------------
# USER SECTION
# --------------------------
state = gr.State({"index": 0})
audio_played = gr.State({"played": 0})
with gr.Row():
user_id = gr.Textbox(label="User ID")
gender = gr.Dropdown(["Male", "Female", "Other"], label="Gender", value="Other")
audio_player = gr.Audio(label="Audio File")
with gr.Row():
score = gr.Dropdown(choices=["None","1", "2", "3", "4", "5"], value="None", label="MOS Score (1–5)")
submit_btn = gr.Button("Submit Score", interactive=False)
status = gr.Textbox(label="Status", interactive=False)
# Load first audio
demo.load(
load_next_audio,
inputs=state,
outputs=[audio_player, state, audio_played, status]
)
# Mark audio as played
audio_player.play(
mark_audio_played,
None,
audio_played
)
# Enable submit button only when conditions are met
user_id.change(
check_submit_ready,
inputs=[user_id, audio_played, score],
outputs=submit_btn
)
audio_played.change(
check_submit_ready,
inputs=[user_id, audio_played, score],
outputs=submit_btn
)
score.change(
check_submit_ready,
inputs=[user_id, audio_played, score],
outputs=submit_btn
)
# Save annotation
submit_btn.click(
submit_annotation,
inputs=[user_id, gender, score, state],
outputs=[state, status]
)
# Load next audio
submit_btn.click(
load_next_audio,
inputs=state,
outputs=[audio_player, state, audio_played, status]
)
# ============================
# ADMIN DASHBOARD
# ============================
gr.Markdown("## Admin Dashboard (Restricted Access)")
with gr.Row():
admin_password = gr.Textbox(label="Admin Password", type="password")
admin_login_btn = gr.Button("Login")
login_status = gr.Textbox(label="Login Status:", interactive=False)
with gr.Column(visible=False) as admin_panel:
gr.Markdown("### Annotation Results")
pd_loaded=load_hf_dataset().to_pandas()
results_table = gr.DataFrame(interactive=False, value=pd_loaded)
pd_loaded.to_csv("annotations_export.csv", index=False)
download_admin = gr.File(label="Download annotations.csv", value="annotations_export.csv")
admin_login_btn.click(
admin_login,
inputs=admin_password,
outputs=[admin_panel, results_table, download_admin, login_status]
)
# ============================
# APP LAUNCH
# ============================
if __name__ == "__main__":
demo.launch()
|