lex-sobieski commited on
Commit
4051497
·
1 Parent(s): 5bbdf68

update code for new USL Editor app

Browse files
Functions/caption_editor_functions.py CHANGED
@@ -3,24 +3,52 @@ from Functions.db_connection import default_app
3
  from Resources.localization import get_string
4
 
5
 
6
- def save_captions_to_db(df, video_id, user):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  try:
8
  data = df.copy()
9
- data.columns = ['start_time', 'text', 'end_time']
 
 
10
  df_json = data.to_dict(orient="index")
11
  default_app.database().child("video_captions").child(video_id).child("captions").set(df_json)
12
- default_app.database().child("video_captions").child(video_id).child("username").set(user)
 
 
 
13
  return get_string("save_successful")
14
  except Exception as e:
15
  return f"{get_string('save_failed')} {str(e)}"
16
 
17
 
 
18
  def request_captions_by_video_id(video_id):
 
 
 
 
 
 
19
  response = default_app.database().child("video_captions").child(video_id).child("captions").get().val()
20
  if response is None:
21
- captions = pd.DataFrame(columns=["end_time", "start_time", "text"])
22
  else:
23
  captions = pd.DataFrame(response)
24
- captions_edit = captions[['start_time', 'text', 'end_time']]
25
- captions_edit.columns = ["Start", "Text", "End"]
 
 
26
  return captions_edit
 
3
  from Resources.localization import get_string
4
 
5
 
6
+ # ADC-IMPLEMENTS: <gc-transform-caption-df-01>
7
+ def save_captions_to_db(df, video_id, user, video_pointer):
8
+ """Save captions to Firebase with per-entry alignment and video assignment.
9
+
10
+ Args:
11
+ df: pandas DataFrame with columns ["Start", "Text", "End", "Aligned"]
12
+ (4-column DataFrame with per-entry aligned values already set by caller)
13
+ video_id: YouTube video ID string
14
+ user: HuggingFace username string
15
+ video_pointer: Integer index of the video in the videos collection
16
+
17
+ The Aligned column is already set correctly per-entry by save_entry in app.py.
18
+ This function simply renames columns to Firebase field names and writes.
19
+ It also writes the current user to videos/{video_pointer}/assigned_to.
20
+ """
21
  try:
22
  data = df.copy()
23
+ # The Aligned column is already set correctly per-entry by the caller.
24
+ # Just rename columns to Firebase field names.
25
+ data.columns = ['start_time', 'text', 'end_time', 'aligned']
26
  df_json = data.to_dict(orient="index")
27
  default_app.database().child("video_captions").child(video_id).child("captions").set(df_json)
28
+
29
+ # Auto-assign: write the current user to the video's assigned_to field
30
+ # video_pointer is passed directly by the caller -- no resolution needed
31
+ default_app.database().child("videos").child(str(video_pointer)).child("assigned_to").set(user)
32
  return get_string("save_successful")
33
  except Exception as e:
34
  return f"{get_string('save_failed')} {str(e)}"
35
 
36
 
37
+ # ADC-IMPLEMENTS: <gc-transform-firebase-df-01>
38
  def request_captions_by_video_id(video_id):
39
+ """Read captions from Firebase and return a 4-column DataFrame.
40
+
41
+ Returns all 4 columns: Start, Text, End, Aligned. The caller
42
+ (get_next_components in app.py) returns this 4-column DataFrame
43
+ directly to the gr.DataFrame for display.
44
+ """
45
  response = default_app.database().child("video_captions").child(video_id).child("captions").get().val()
46
  if response is None:
47
+ captions = pd.DataFrame(columns=["end_time", "start_time", "text", "aligned"])
48
  else:
49
  captions = pd.DataFrame(response)
50
+ if 'aligned' not in captions.columns:
51
+ captions['aligned'] = False
52
+ captions_edit = captions[['start_time', 'text', 'end_time', 'aligned']]
53
+ captions_edit.columns = ["Start", "Text", "End", "Aligned"]
54
  return captions_edit
Functions/video_player_functions.py CHANGED
@@ -15,6 +15,8 @@ def youtube_link_to_id(link):
15
  try:
16
  from urllib.parse import urlparse, parse_qs
17
  parsed = urlparse(link)
 
 
18
  return parse_qs(parsed.query)['v'][0]
19
  except (KeyError, IndexError):
20
  raise ValueError(f"Invalid YouTube URL: {link}")
@@ -25,9 +27,21 @@ def get_video_embed_by_id(video_id):
25
  return video_id
26
 
27
 
 
28
  def get_video_link_by_pointer(pointer, show_incomplete_only):
 
 
 
 
 
 
 
 
 
29
  video = default_app.database().child("videos").child(str(pointer)).get().val()
30
- if video["complete"] and show_incomplete_only:
 
 
31
  return None
32
  return video["url"]
33
 
 
15
  try:
16
  from urllib.parse import urlparse, parse_qs
17
  parsed = urlparse(link)
18
+ if parsed.hostname in ('youtu.be',):
19
+ return parsed.path.lstrip('/')
20
  return parse_qs(parsed.query)['v'][0]
21
  except (KeyError, IndexError):
22
  raise ValueError(f"Invalid YouTube URL: {link}")
 
27
  return video_id
28
 
29
 
30
+ # ADC-IMPLEMENTS: <gc-feature-assignment-01>
31
  def get_video_link_by_pointer(pointer, show_incomplete_only):
32
+ """Get video link with optional incomplete-only filtering.
33
+
34
+ Args:
35
+ pointer: Video index in the database
36
+ show_incomplete_only: If True, skip videos marked as complete
37
+
38
+ Returns:
39
+ Video URL string, or None if the video is filtered out
40
+ """
41
  video = default_app.database().child("videos").child(str(pointer)).get().val()
42
+ if video is None:
43
+ return None
44
+ if show_incomplete_only and video.get("complete", False):
45
  return None
46
  return video["url"]
47
 
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: CaptionEditor
3
  emoji: 🌖
4
  colorFrom: blue
5
  colorTo: red
 
1
  ---
2
+ title: USL-Editor
3
  emoji: 🌖
4
  colorFrom: blue
5
  colorTo: red
Resources/localization.py CHANGED
@@ -21,6 +21,7 @@ STRINGS = {
21
  "header_start": "Start",
22
  "header_text": "Text",
23
  "header_end": "End",
 
24
 
25
  # Edit form
26
  "edit_caption_title": "Edit Caption Entry",
@@ -58,7 +59,7 @@ STRINGS = {
58
  "error": "Error:",
59
  "all_videos_transcribed": "Save cancelled: All videos transcribed",
60
  "change_video_completion_status_success": "Video completion status successfully changed",
61
- "show_incomplete_only_change": "List of accessible videos successfully changed"
62
  },
63
  "uk": {
64
  # Header and login
@@ -77,6 +78,7 @@ STRINGS = {
77
  "header_start": "Початок",
78
  "header_text": "Текст",
79
  "header_end": "Кінець",
 
80
 
81
  # Edit form
82
  "edit_caption_title": "Редагувати субтитр",
@@ -114,7 +116,7 @@ STRINGS = {
114
  "error": "Помилка:",
115
  "all_videos_transcribed": "Відміна збереження: Усі відео розмічено",
116
  "change_video_completion_status_success": "Статус відео успішно змінено",
117
- "show_incomplete_only_change": "Список доступних відео успішно змінено"
118
  }
119
  }
120
 
 
21
  "header_start": "Start",
22
  "header_text": "Text",
23
  "header_end": "End",
24
+ "header_aligned": "Aligned",
25
 
26
  # Edit form
27
  "edit_caption_title": "Edit Caption Entry",
 
59
  "error": "Error:",
60
  "all_videos_transcribed": "Save cancelled: All videos transcribed",
61
  "change_video_completion_status_success": "Video completion status successfully changed",
62
+ "show_incomplete_only_change": "List of accessible videos successfully changed",
63
  },
64
  "uk": {
65
  # Header and login
 
78
  "header_start": "Початок",
79
  "header_text": "Текст",
80
  "header_end": "Кінець",
81
+ "header_aligned": "Вирівняно",
82
 
83
  # Edit form
84
  "edit_caption_title": "Редагувати субтитр",
 
116
  "error": "Помилка:",
117
  "all_videos_transcribed": "Відміна збереження: Усі відео розмічено",
118
  "change_video_completion_status_success": "Статус відео успішно змінено",
119
+ "show_incomplete_only_change": "Список доступних відео успішно змінено",
120
  }
121
  }
122
 
app.py CHANGED
@@ -10,7 +10,7 @@ from Resources.localization import get_string
10
  next_video_pointer = 0
11
  user = "anonymous_user"
12
  n_videos = get_number_of_videos()
13
- placeholder_link = "https://www.youtube.com/watch?v=wTQjwG2-ePA"
14
 
15
 
16
  def get_username(profile: gr.OAuthProfile):
@@ -38,8 +38,16 @@ def on_row_select(df, evt: gr.SelectData):
38
  return gr.update(value=0.0), gr.update(value=""), gr.update(value=0.0), -1, get_string("save_entry_button")
39
 
40
 
 
41
  def save_entry(df, start_time, text, end_time, selected_row_idx, video_id):
42
- """Save or update a caption entry"""
 
 
 
 
 
 
 
43
  if user == "anonymous_user":
44
  return df, gr.Warning(get_string("please_sign_in"))
45
  if next_video_pointer == -1:
@@ -60,19 +68,24 @@ def save_entry(df, start_time, text, end_time, selected_row_idx, video_id):
60
  new_row = pd.DataFrame({
61
  'Start': [start_time],
62
  'Text': [text.strip()],
63
- 'End': [end_time]
 
64
  })
65
  df_copy = pd.concat([df_copy, new_row], ignore_index=True)
66
- # Sort by start time
67
  df_copy = df_copy.sort_values('Start').reset_index(drop=True)
68
  else: # Updating existing entry
69
  if 0 <= selected_row_idx < len(df_copy):
70
- df_copy.iloc[selected_row_idx] = [start_time, text.strip(), end_time]
71
- # Sort by start time
 
 
72
  df_copy = df_copy.sort_values('Start').reset_index(drop=True)
73
 
74
- # Update in database
75
- save_result = save_captions_to_db(df_copy, video_id, user)
 
 
 
76
 
77
  return (
78
  df_copy,
@@ -131,13 +144,24 @@ def change_completion_status(completion_status):
131
  return gr.Error(f"{get_string('error')} {str(e)}")
132
 
133
 
 
134
  def get_next_components(show_incomplete_only):
 
 
 
 
 
 
 
 
 
135
  global next_video_pointer
 
136
  if next_video_pointer != -1:
137
  next_video_link = get_video_link_by_pointer(next_video_pointer, show_incomplete_only)
138
  next_video_pointer = (next_video_pointer + 1) % n_videos
139
 
140
- for i in range(n_videos + 1):
141
  if next_video_link is not None:
142
  break
143
  next_video_link = get_video_link_by_pointer(next_video_pointer, show_incomplete_only)
@@ -148,10 +172,10 @@ def get_next_components(show_incomplete_only):
148
 
149
  try:
150
  next_video_id = youtube_link_to_id(next_video_link)
151
- next_captions = request_captions_by_video_id(next_video_id)
152
- return next_captions, next_video_id
153
- except (ValueError, Exception) as e:
154
- empty_captions = pd.DataFrame(columns=["Start", "Text", "End"])
155
  return empty_captions, "error"
156
 
157
 
@@ -226,10 +250,10 @@ with gr.Blocks(css=css, head=yt_init_js, fill_width=True) as main_page:
226
  caption_editor = gr.DataFrame(
227
  interactive=False,
228
  elem_id="tbl",
229
- datatype=["number", "str", "number"],
230
- col_count=(3, "fixed"),
231
- column_widths=["15%", "70%", "15%"],
232
- headers=[get_string("header_start"), get_string("header_text"), get_string("header_end")],
233
  wrap=True
234
  )
235
 
@@ -255,7 +279,7 @@ with gr.Blocks(css=css, head=yt_init_js, fill_width=True) as main_page:
255
 
256
  next_video_button.click(
257
  fn=get_next_components,
258
- inputs=show_incomplete_only_checkbox,
259
  outputs=[caption_editor, current_video_id]
260
  )
261
  next_video_button.click(
 
10
  next_video_pointer = 0
11
  user = "anonymous_user"
12
  n_videos = get_number_of_videos()
13
+ placeholder_link = "https://www.youtube.com/watch?v=d37lwXaSjs4"
14
 
15
 
16
  def get_username(profile: gr.OAuthProfile):
 
38
  return gr.update(value=0.0), gr.update(value=""), gr.update(value=0.0), -1, get_string("save_entry_button")
39
 
40
 
41
+ # ADC-IMPLEMENTS: <gc-feature-aligned-ui-01>
42
  def save_entry(df, start_time, text, end_time, selected_row_idx, video_id):
43
+ """Save or update a caption entry with per-entry alignment tracking.
44
+
45
+ Works directly with the 4-column DataFrame (Start, Text, End, Aligned).
46
+ Sets aligned=True ONLY for the specific row being added or updated.
47
+ All other rows retain their existing Aligned values from the DataFrame.
48
+ The current video pointer is computed from global state and passed to
49
+ save_captions_to_db for auto-assignment.
50
+ """
51
  if user == "anonymous_user":
52
  return df, gr.Warning(get_string("please_sign_in"))
53
  if next_video_pointer == -1:
 
68
  new_row = pd.DataFrame({
69
  'Start': [start_time],
70
  'Text': [text.strip()],
71
+ 'End': [end_time],
72
+ 'Aligned': [True] # New entry gets aligned=True
73
  })
74
  df_copy = pd.concat([df_copy, new_row], ignore_index=True)
 
75
  df_copy = df_copy.sort_values('Start').reset_index(drop=True)
76
  else: # Updating existing entry
77
  if 0 <= selected_row_idx < len(df_copy):
78
+ df_copy.at[selected_row_idx, 'Start'] = start_time
79
+ df_copy.at[selected_row_idx, 'Text'] = text.strip()
80
+ df_copy.at[selected_row_idx, 'End'] = end_time
81
+ df_copy.at[selected_row_idx, 'Aligned'] = True # Only this row gets aligned=True
82
  df_copy = df_copy.sort_values('Start').reset_index(drop=True)
83
 
84
+ # Compute the current video's pointer index from global state.
85
+ # This is the same formula used by change_completion_status.
86
+ current_pointer = (next_video_pointer + n_videos - 1) % n_videos
87
+ # save_captions_to_db receives the 4-col DF directly
88
+ save_result = save_captions_to_db(df_copy, video_id, user, current_pointer)
89
 
90
  return (
91
  df_copy,
 
144
  return gr.Error(f"{get_string('error')} {str(e)}")
145
 
146
 
147
+ # ADC-IMPLEMENTS: <gc-feature-assignment-01>
148
  def get_next_components(show_incomplete_only):
149
+ """Get the next video and its captions as a 4-column DataFrame.
150
+
151
+ Returns:
152
+ captions: 4-column DataFrame [Start, Text, End, Aligned] for the UI
153
+ next_video_id: YouTube video ID string
154
+
155
+ Args:
156
+ show_incomplete_only: If True, skip videos marked as complete
157
+ """
158
  global next_video_pointer
159
+ next_video_link = placeholder_link
160
  if next_video_pointer != -1:
161
  next_video_link = get_video_link_by_pointer(next_video_pointer, show_incomplete_only)
162
  next_video_pointer = (next_video_pointer + 1) % n_videos
163
 
164
+ for _ in range(n_videos + 1):
165
  if next_video_link is not None:
166
  break
167
  next_video_link = get_video_link_by_pointer(next_video_pointer, show_incomplete_only)
 
172
 
173
  try:
174
  next_video_id = youtube_link_to_id(next_video_link)
175
+ captions = request_captions_by_video_id(next_video_id)
176
+ return captions, next_video_id
177
+ except (ValueError, Exception):
178
+ empty_captions = pd.DataFrame(columns=["Start", "Text", "End", "Aligned"])
179
  return empty_captions, "error"
180
 
181
 
 
250
  caption_editor = gr.DataFrame(
251
  interactive=False,
252
  elem_id="tbl",
253
+ datatype=["number", "str", "number", "bool"],
254
+ col_count=(4, "fixed"),
255
+ column_widths=["12%", "60%", "12%", "16%"],
256
+ headers=[get_string("header_start"), get_string("header_text"), get_string("header_end"), get_string("header_aligned")],
257
  wrap=True
258
  )
259
 
 
279
 
280
  next_video_button.click(
281
  fn=get_next_components,
282
+ inputs=[show_incomplete_only_checkbox],
283
  outputs=[caption_editor, current_video_id]
284
  )
285
  next_video_button.click(