Spaces:
Runtime error
Runtime error
Matthijs Hollemans
commited on
Commit
·
1aa521f
1
Parent(s):
f539e6d
draw words one-by-one
Browse files
app.py
CHANGED
|
@@ -21,16 +21,18 @@ font = ImageFont.truetype("Lato-Regular.ttf", 40)
|
|
| 21 |
text_color = (255, 200, 200)
|
| 22 |
highlight_color = (255, 255, 255)
|
| 23 |
|
| 24 |
-
|
| 25 |
# checkpoint = "openai/whisper-base"
|
| 26 |
-
checkpoint = "openai/whisper-small"
|
| 27 |
pipe = pipeline(model=checkpoint)
|
| 28 |
|
| 29 |
# TODO: no longer need to set these manually once the models have been updated on the Hub
|
|
|
|
|
|
|
| 30 |
# whisper-base
|
| 31 |
-
# pipe.model.
|
| 32 |
# whisper-small
|
| 33 |
-
pipe.model.
|
| 34 |
|
| 35 |
chunks = []
|
| 36 |
|
|
@@ -53,7 +55,6 @@ def make_frame(t):
|
|
| 53 |
# for debugging: draw frame time
|
| 54 |
#draw.text((20, 20), str(t), fill=text_color, font=font)
|
| 55 |
|
| 56 |
-
space_length = draw.textlength(" ", font)
|
| 57 |
x = margin_left
|
| 58 |
y = margin_top
|
| 59 |
|
|
@@ -63,24 +64,13 @@ def make_frame(t):
|
|
| 63 |
if chunk_end is None: chunk_end = max_duration
|
| 64 |
|
| 65 |
if chunk_start <= t <= chunk_end:
|
| 66 |
-
|
| 67 |
-
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
if x + word_length >= video_width - margin_right:
|
| 72 |
-
x = margin_left
|
| 73 |
-
y += line_height
|
| 74 |
-
|
| 75 |
-
if times[0] <= t <= times[1]:
|
| 76 |
-
color = highlight_color
|
| 77 |
-
draw.rectangle([x, y + line_height, x + word_length, y + line_height + 4], fill=color)
|
| 78 |
-
else:
|
| 79 |
-
color = text_color
|
| 80 |
-
|
| 81 |
-
draw.text((x, y), word, fill=color, font=font)
|
| 82 |
-
x += word_length + space_length
|
| 83 |
|
|
|
|
| 84 |
break
|
| 85 |
|
| 86 |
return np.array(image)
|
|
|
|
| 21 |
text_color = (255, 200, 200)
|
| 22 |
highlight_color = (255, 255, 255)
|
| 23 |
|
| 24 |
+
checkpoint = "openai/whisper-tiny"
|
| 25 |
# checkpoint = "openai/whisper-base"
|
| 26 |
+
# checkpoint = "openai/whisper-small"
|
| 27 |
pipe = pipeline(model=checkpoint)
|
| 28 |
|
| 29 |
# TODO: no longer need to set these manually once the models have been updated on the Hub
|
| 30 |
+
# whisper-tiny
|
| 31 |
+
pipe.model.generation_config.alignment_heads = [[2, 2], [3, 0], [3, 2], [3, 3], [3, 4], [3, 5]]
|
| 32 |
# whisper-base
|
| 33 |
+
# pipe.model.generation_config.alignment_heads = [[3, 1], [4, 2], [4, 3], [4, 7], [5, 1], [5, 2], [5, 4], [5, 6]]
|
| 34 |
# whisper-small
|
| 35 |
+
# pipe.model.generation_config.alignment_heads = [[5, 3], [5, 9], [8, 0], [8, 4], [8, 7], [8, 8], [9, 0], [9, 7], [9, 9], [10, 5]]
|
| 36 |
|
| 37 |
chunks = []
|
| 38 |
|
|
|
|
| 55 |
# for debugging: draw frame time
|
| 56 |
#draw.text((20, 20), str(t), fill=text_color, font=font)
|
| 57 |
|
|
|
|
| 58 |
x = margin_left
|
| 59 |
y = margin_top
|
| 60 |
|
|
|
|
| 64 |
if chunk_end is None: chunk_end = max_duration
|
| 65 |
|
| 66 |
if chunk_start <= t <= chunk_end:
|
| 67 |
+
word = chunk["text"]
|
| 68 |
+
word_length = draw.textlength(word, font)
|
| 69 |
|
| 70 |
+
x = (video_width - word_length) / 2
|
| 71 |
+
y = video_height / 2 - 20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
+
draw.text((x, y), word, fill=highlight_color, font=font)
|
| 74 |
break
|
| 75 |
|
| 76 |
return np.array(image)
|