Spaces:
Running
Running
Commit ·
4051497
1
Parent(s): 5bbdf68
update code for new USL Editor app
Browse files- Functions/caption_editor_functions.py +34 -6
- Functions/video_player_functions.py +15 -1
- README.md +1 -1
- Resources/localization.py +4 -2
- app.py +42 -18
Functions/caption_editor_functions.py
CHANGED
|
@@ -3,24 +3,52 @@ from Functions.db_connection import default_app
|
|
| 3 |
from Resources.localization import get_string
|
| 4 |
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
try:
|
| 8 |
data = df.copy()
|
| 9 |
-
|
|
|
|
|
|
|
| 10 |
df_json = data.to_dict(orient="index")
|
| 11 |
default_app.database().child("video_captions").child(video_id).child("captions").set(df_json)
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
| 13 |
return get_string("save_successful")
|
| 14 |
except Exception as e:
|
| 15 |
return f"{get_string('save_failed')} {str(e)}"
|
| 16 |
|
| 17 |
|
|
|
|
| 18 |
def request_captions_by_video_id(video_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
response = default_app.database().child("video_captions").child(video_id).child("captions").get().val()
|
| 20 |
if response is None:
|
| 21 |
-
captions = pd.DataFrame(columns=["end_time", "start_time", "text"])
|
| 22 |
else:
|
| 23 |
captions = pd.DataFrame(response)
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
| 26 |
return captions_edit
|
|
|
|
| 3 |
from Resources.localization import get_string
|
| 4 |
|
| 5 |
|
| 6 |
+
# ADC-IMPLEMENTS: <gc-transform-caption-df-01>
|
| 7 |
+
def save_captions_to_db(df, video_id, user, video_pointer):
|
| 8 |
+
"""Save captions to Firebase with per-entry alignment and video assignment.
|
| 9 |
+
|
| 10 |
+
Args:
|
| 11 |
+
df: pandas DataFrame with columns ["Start", "Text", "End", "Aligned"]
|
| 12 |
+
(4-column DataFrame with per-entry aligned values already set by caller)
|
| 13 |
+
video_id: YouTube video ID string
|
| 14 |
+
user: HuggingFace username string
|
| 15 |
+
video_pointer: Integer index of the video in the videos collection
|
| 16 |
+
|
| 17 |
+
The Aligned column is already set correctly per-entry by save_entry in app.py.
|
| 18 |
+
This function simply renames columns to Firebase field names and writes.
|
| 19 |
+
It also writes the current user to videos/{video_pointer}/assigned_to.
|
| 20 |
+
"""
|
| 21 |
try:
|
| 22 |
data = df.copy()
|
| 23 |
+
# The Aligned column is already set correctly per-entry by the caller.
|
| 24 |
+
# Just rename columns to Firebase field names.
|
| 25 |
+
data.columns = ['start_time', 'text', 'end_time', 'aligned']
|
| 26 |
df_json = data.to_dict(orient="index")
|
| 27 |
default_app.database().child("video_captions").child(video_id).child("captions").set(df_json)
|
| 28 |
+
|
| 29 |
+
# Auto-assign: write the current user to the video's assigned_to field
|
| 30 |
+
# video_pointer is passed directly by the caller -- no resolution needed
|
| 31 |
+
default_app.database().child("videos").child(str(video_pointer)).child("assigned_to").set(user)
|
| 32 |
return get_string("save_successful")
|
| 33 |
except Exception as e:
|
| 34 |
return f"{get_string('save_failed')} {str(e)}"
|
| 35 |
|
| 36 |
|
| 37 |
+
# ADC-IMPLEMENTS: <gc-transform-firebase-df-01>
|
| 38 |
def request_captions_by_video_id(video_id):
|
| 39 |
+
"""Read captions from Firebase and return a 4-column DataFrame.
|
| 40 |
+
|
| 41 |
+
Returns all 4 columns: Start, Text, End, Aligned. The caller
|
| 42 |
+
(get_next_components in app.py) returns this 4-column DataFrame
|
| 43 |
+
directly to the gr.DataFrame for display.
|
| 44 |
+
"""
|
| 45 |
response = default_app.database().child("video_captions").child(video_id).child("captions").get().val()
|
| 46 |
if response is None:
|
| 47 |
+
captions = pd.DataFrame(columns=["end_time", "start_time", "text", "aligned"])
|
| 48 |
else:
|
| 49 |
captions = pd.DataFrame(response)
|
| 50 |
+
if 'aligned' not in captions.columns:
|
| 51 |
+
captions['aligned'] = False
|
| 52 |
+
captions_edit = captions[['start_time', 'text', 'end_time', 'aligned']]
|
| 53 |
+
captions_edit.columns = ["Start", "Text", "End", "Aligned"]
|
| 54 |
return captions_edit
|
Functions/video_player_functions.py
CHANGED
|
@@ -15,6 +15,8 @@ def youtube_link_to_id(link):
|
|
| 15 |
try:
|
| 16 |
from urllib.parse import urlparse, parse_qs
|
| 17 |
parsed = urlparse(link)
|
|
|
|
|
|
|
| 18 |
return parse_qs(parsed.query)['v'][0]
|
| 19 |
except (KeyError, IndexError):
|
| 20 |
raise ValueError(f"Invalid YouTube URL: {link}")
|
|
@@ -25,9 +27,21 @@ def get_video_embed_by_id(video_id):
|
|
| 25 |
return video_id
|
| 26 |
|
| 27 |
|
|
|
|
| 28 |
def get_video_link_by_pointer(pointer, show_incomplete_only):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
video = default_app.database().child("videos").child(str(pointer)).get().val()
|
| 30 |
-
if video
|
|
|
|
|
|
|
| 31 |
return None
|
| 32 |
return video["url"]
|
| 33 |
|
|
|
|
| 15 |
try:
|
| 16 |
from urllib.parse import urlparse, parse_qs
|
| 17 |
parsed = urlparse(link)
|
| 18 |
+
if parsed.hostname in ('youtu.be',):
|
| 19 |
+
return parsed.path.lstrip('/')
|
| 20 |
return parse_qs(parsed.query)['v'][0]
|
| 21 |
except (KeyError, IndexError):
|
| 22 |
raise ValueError(f"Invalid YouTube URL: {link}")
|
|
|
|
| 27 |
return video_id
|
| 28 |
|
| 29 |
|
| 30 |
+
# ADC-IMPLEMENTS: <gc-feature-assignment-01>
|
| 31 |
def get_video_link_by_pointer(pointer, show_incomplete_only):
|
| 32 |
+
"""Get video link with optional incomplete-only filtering.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
pointer: Video index in the database
|
| 36 |
+
show_incomplete_only: If True, skip videos marked as complete
|
| 37 |
+
|
| 38 |
+
Returns:
|
| 39 |
+
Video URL string, or None if the video is filtered out
|
| 40 |
+
"""
|
| 41 |
video = default_app.database().child("videos").child(str(pointer)).get().val()
|
| 42 |
+
if video is None:
|
| 43 |
+
return None
|
| 44 |
+
if show_incomplete_only and video.get("complete", False):
|
| 45 |
return None
|
| 46 |
return video["url"]
|
| 47 |
|
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 🌖
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: red
|
|
|
|
| 1 |
---
|
| 2 |
+
title: USL-Editor
|
| 3 |
emoji: 🌖
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: red
|
Resources/localization.py
CHANGED
|
@@ -21,6 +21,7 @@ STRINGS = {
|
|
| 21 |
"header_start": "Start",
|
| 22 |
"header_text": "Text",
|
| 23 |
"header_end": "End",
|
|
|
|
| 24 |
|
| 25 |
# Edit form
|
| 26 |
"edit_caption_title": "Edit Caption Entry",
|
|
@@ -58,7 +59,7 @@ STRINGS = {
|
|
| 58 |
"error": "Error:",
|
| 59 |
"all_videos_transcribed": "Save cancelled: All videos transcribed",
|
| 60 |
"change_video_completion_status_success": "Video completion status successfully changed",
|
| 61 |
-
"show_incomplete_only_change": "List of accessible videos successfully changed"
|
| 62 |
},
|
| 63 |
"uk": {
|
| 64 |
# Header and login
|
|
@@ -77,6 +78,7 @@ STRINGS = {
|
|
| 77 |
"header_start": "Початок",
|
| 78 |
"header_text": "Текст",
|
| 79 |
"header_end": "Кінець",
|
|
|
|
| 80 |
|
| 81 |
# Edit form
|
| 82 |
"edit_caption_title": "Редагувати субтитр",
|
|
@@ -114,7 +116,7 @@ STRINGS = {
|
|
| 114 |
"error": "Помилка:",
|
| 115 |
"all_videos_transcribed": "Відміна збереження: Усі відео розмічено",
|
| 116 |
"change_video_completion_status_success": "Статус відео успішно змінено",
|
| 117 |
-
"show_incomplete_only_change": "Список доступних відео успішно змінено"
|
| 118 |
}
|
| 119 |
}
|
| 120 |
|
|
|
|
| 21 |
"header_start": "Start",
|
| 22 |
"header_text": "Text",
|
| 23 |
"header_end": "End",
|
| 24 |
+
"header_aligned": "Aligned",
|
| 25 |
|
| 26 |
# Edit form
|
| 27 |
"edit_caption_title": "Edit Caption Entry",
|
|
|
|
| 59 |
"error": "Error:",
|
| 60 |
"all_videos_transcribed": "Save cancelled: All videos transcribed",
|
| 61 |
"change_video_completion_status_success": "Video completion status successfully changed",
|
| 62 |
+
"show_incomplete_only_change": "List of accessible videos successfully changed",
|
| 63 |
},
|
| 64 |
"uk": {
|
| 65 |
# Header and login
|
|
|
|
| 78 |
"header_start": "Початок",
|
| 79 |
"header_text": "Текст",
|
| 80 |
"header_end": "Кінець",
|
| 81 |
+
"header_aligned": "Вирівняно",
|
| 82 |
|
| 83 |
# Edit form
|
| 84 |
"edit_caption_title": "Редагувати субтитр",
|
|
|
|
| 116 |
"error": "Помилка:",
|
| 117 |
"all_videos_transcribed": "Відміна збереження: Усі відео розмічено",
|
| 118 |
"change_video_completion_status_success": "Статус відео успішно змінено",
|
| 119 |
+
"show_incomplete_only_change": "Список доступних відео успішно змінено",
|
| 120 |
}
|
| 121 |
}
|
| 122 |
|
app.py
CHANGED
|
@@ -10,7 +10,7 @@ from Resources.localization import get_string
|
|
| 10 |
next_video_pointer = 0
|
| 11 |
user = "anonymous_user"
|
| 12 |
n_videos = get_number_of_videos()
|
| 13 |
-
placeholder_link = "https://www.youtube.com/watch?v=
|
| 14 |
|
| 15 |
|
| 16 |
def get_username(profile: gr.OAuthProfile):
|
|
@@ -38,8 +38,16 @@ def on_row_select(df, evt: gr.SelectData):
|
|
| 38 |
return gr.update(value=0.0), gr.update(value=""), gr.update(value=0.0), -1, get_string("save_entry_button")
|
| 39 |
|
| 40 |
|
|
|
|
| 41 |
def save_entry(df, start_time, text, end_time, selected_row_idx, video_id):
|
| 42 |
-
"""Save or update a caption entry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
if user == "anonymous_user":
|
| 44 |
return df, gr.Warning(get_string("please_sign_in"))
|
| 45 |
if next_video_pointer == -1:
|
|
@@ -60,19 +68,24 @@ def save_entry(df, start_time, text, end_time, selected_row_idx, video_id):
|
|
| 60 |
new_row = pd.DataFrame({
|
| 61 |
'Start': [start_time],
|
| 62 |
'Text': [text.strip()],
|
| 63 |
-
'End': [end_time]
|
|
|
|
| 64 |
})
|
| 65 |
df_copy = pd.concat([df_copy, new_row], ignore_index=True)
|
| 66 |
-
# Sort by start time
|
| 67 |
df_copy = df_copy.sort_values('Start').reset_index(drop=True)
|
| 68 |
else: # Updating existing entry
|
| 69 |
if 0 <= selected_row_idx < len(df_copy):
|
| 70 |
-
df_copy.
|
| 71 |
-
|
|
|
|
|
|
|
| 72 |
df_copy = df_copy.sort_values('Start').reset_index(drop=True)
|
| 73 |
|
| 74 |
-
#
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
return (
|
| 78 |
df_copy,
|
|
@@ -131,13 +144,24 @@ def change_completion_status(completion_status):
|
|
| 131 |
return gr.Error(f"{get_string('error')} {str(e)}")
|
| 132 |
|
| 133 |
|
|
|
|
| 134 |
def get_next_components(show_incomplete_only):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
global next_video_pointer
|
|
|
|
| 136 |
if next_video_pointer != -1:
|
| 137 |
next_video_link = get_video_link_by_pointer(next_video_pointer, show_incomplete_only)
|
| 138 |
next_video_pointer = (next_video_pointer + 1) % n_videos
|
| 139 |
|
| 140 |
-
for
|
| 141 |
if next_video_link is not None:
|
| 142 |
break
|
| 143 |
next_video_link = get_video_link_by_pointer(next_video_pointer, show_incomplete_only)
|
|
@@ -148,10 +172,10 @@ def get_next_components(show_incomplete_only):
|
|
| 148 |
|
| 149 |
try:
|
| 150 |
next_video_id = youtube_link_to_id(next_video_link)
|
| 151 |
-
|
| 152 |
-
return
|
| 153 |
-
except (ValueError, Exception)
|
| 154 |
-
empty_captions = pd.DataFrame(columns=["Start", "Text", "End"])
|
| 155 |
return empty_captions, "error"
|
| 156 |
|
| 157 |
|
|
@@ -226,10 +250,10 @@ with gr.Blocks(css=css, head=yt_init_js, fill_width=True) as main_page:
|
|
| 226 |
caption_editor = gr.DataFrame(
|
| 227 |
interactive=False,
|
| 228 |
elem_id="tbl",
|
| 229 |
-
datatype=["number", "str", "number"],
|
| 230 |
-
col_count=(
|
| 231 |
-
column_widths=["
|
| 232 |
-
headers=[get_string("header_start"), get_string("header_text"), get_string("header_end")],
|
| 233 |
wrap=True
|
| 234 |
)
|
| 235 |
|
|
@@ -255,7 +279,7 @@ with gr.Blocks(css=css, head=yt_init_js, fill_width=True) as main_page:
|
|
| 255 |
|
| 256 |
next_video_button.click(
|
| 257 |
fn=get_next_components,
|
| 258 |
-
inputs=show_incomplete_only_checkbox,
|
| 259 |
outputs=[caption_editor, current_video_id]
|
| 260 |
)
|
| 261 |
next_video_button.click(
|
|
|
|
| 10 |
next_video_pointer = 0
|
| 11 |
user = "anonymous_user"
|
| 12 |
n_videos = get_number_of_videos()
|
| 13 |
+
placeholder_link = "https://www.youtube.com/watch?v=d37lwXaSjs4"
|
| 14 |
|
| 15 |
|
| 16 |
def get_username(profile: gr.OAuthProfile):
|
|
|
|
| 38 |
return gr.update(value=0.0), gr.update(value=""), gr.update(value=0.0), -1, get_string("save_entry_button")
|
| 39 |
|
| 40 |
|
| 41 |
+
# ADC-IMPLEMENTS: <gc-feature-aligned-ui-01>
|
| 42 |
def save_entry(df, start_time, text, end_time, selected_row_idx, video_id):
|
| 43 |
+
"""Save or update a caption entry with per-entry alignment tracking.
|
| 44 |
+
|
| 45 |
+
Works directly with the 4-column DataFrame (Start, Text, End, Aligned).
|
| 46 |
+
Sets aligned=True ONLY for the specific row being added or updated.
|
| 47 |
+
All other rows retain their existing Aligned values from the DataFrame.
|
| 48 |
+
The current video pointer is computed from global state and passed to
|
| 49 |
+
save_captions_to_db for auto-assignment.
|
| 50 |
+
"""
|
| 51 |
if user == "anonymous_user":
|
| 52 |
return df, gr.Warning(get_string("please_sign_in"))
|
| 53 |
if next_video_pointer == -1:
|
|
|
|
| 68 |
new_row = pd.DataFrame({
|
| 69 |
'Start': [start_time],
|
| 70 |
'Text': [text.strip()],
|
| 71 |
+
'End': [end_time],
|
| 72 |
+
'Aligned': [True] # New entry gets aligned=True
|
| 73 |
})
|
| 74 |
df_copy = pd.concat([df_copy, new_row], ignore_index=True)
|
|
|
|
| 75 |
df_copy = df_copy.sort_values('Start').reset_index(drop=True)
|
| 76 |
else: # Updating existing entry
|
| 77 |
if 0 <= selected_row_idx < len(df_copy):
|
| 78 |
+
df_copy.at[selected_row_idx, 'Start'] = start_time
|
| 79 |
+
df_copy.at[selected_row_idx, 'Text'] = text.strip()
|
| 80 |
+
df_copy.at[selected_row_idx, 'End'] = end_time
|
| 81 |
+
df_copy.at[selected_row_idx, 'Aligned'] = True # Only this row gets aligned=True
|
| 82 |
df_copy = df_copy.sort_values('Start').reset_index(drop=True)
|
| 83 |
|
| 84 |
+
# Compute the current video's pointer index from global state.
|
| 85 |
+
# This is the same formula used by change_completion_status.
|
| 86 |
+
current_pointer = (next_video_pointer + n_videos - 1) % n_videos
|
| 87 |
+
# save_captions_to_db receives the 4-col DF directly
|
| 88 |
+
save_result = save_captions_to_db(df_copy, video_id, user, current_pointer)
|
| 89 |
|
| 90 |
return (
|
| 91 |
df_copy,
|
|
|
|
| 144 |
return gr.Error(f"{get_string('error')} {str(e)}")
|
| 145 |
|
| 146 |
|
| 147 |
+
# ADC-IMPLEMENTS: <gc-feature-assignment-01>
|
| 148 |
def get_next_components(show_incomplete_only):
|
| 149 |
+
"""Get the next video and its captions as a 4-column DataFrame.
|
| 150 |
+
|
| 151 |
+
Returns:
|
| 152 |
+
captions: 4-column DataFrame [Start, Text, End, Aligned] for the UI
|
| 153 |
+
next_video_id: YouTube video ID string
|
| 154 |
+
|
| 155 |
+
Args:
|
| 156 |
+
show_incomplete_only: If True, skip videos marked as complete
|
| 157 |
+
"""
|
| 158 |
global next_video_pointer
|
| 159 |
+
next_video_link = placeholder_link
|
| 160 |
if next_video_pointer != -1:
|
| 161 |
next_video_link = get_video_link_by_pointer(next_video_pointer, show_incomplete_only)
|
| 162 |
next_video_pointer = (next_video_pointer + 1) % n_videos
|
| 163 |
|
| 164 |
+
for _ in range(n_videos + 1):
|
| 165 |
if next_video_link is not None:
|
| 166 |
break
|
| 167 |
next_video_link = get_video_link_by_pointer(next_video_pointer, show_incomplete_only)
|
|
|
|
| 172 |
|
| 173 |
try:
|
| 174 |
next_video_id = youtube_link_to_id(next_video_link)
|
| 175 |
+
captions = request_captions_by_video_id(next_video_id)
|
| 176 |
+
return captions, next_video_id
|
| 177 |
+
except (ValueError, Exception):
|
| 178 |
+
empty_captions = pd.DataFrame(columns=["Start", "Text", "End", "Aligned"])
|
| 179 |
return empty_captions, "error"
|
| 180 |
|
| 181 |
|
|
|
|
| 250 |
caption_editor = gr.DataFrame(
|
| 251 |
interactive=False,
|
| 252 |
elem_id="tbl",
|
| 253 |
+
datatype=["number", "str", "number", "bool"],
|
| 254 |
+
col_count=(4, "fixed"),
|
| 255 |
+
column_widths=["12%", "60%", "12%", "16%"],
|
| 256 |
+
headers=[get_string("header_start"), get_string("header_text"), get_string("header_end"), get_string("header_aligned")],
|
| 257 |
wrap=True
|
| 258 |
)
|
| 259 |
|
|
|
|
| 279 |
|
| 280 |
next_video_button.click(
|
| 281 |
fn=get_next_components,
|
| 282 |
+
inputs=[show_incomplete_only_checkbox],
|
| 283 |
outputs=[caption_editor, current_video_id]
|
| 284 |
)
|
| 285 |
next_video_button.click(
|