Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -42,39 +42,34 @@ def process_video(Video, target_language):
|
|
| 42 |
run(["ffmpeg", "-i", Video, audio_file])
|
| 43 |
transcript_file = f"{common_uuid}.srt"
|
| 44 |
# Transcription with Whisper.
|
| 45 |
-
print("
|
| 46 |
-
segments, _ = whisper_model.transcribe(audio_file,
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
for segment in segments:
|
| 50 |
-
for word in segment.words:
|
| 51 |
-
start_time = f"{word.start:.2f}"
|
| 52 |
-
end_time = f"{word.end:.2f}"
|
| 53 |
-
transcript_with_timestamps.append(f"[{start_time}s -> {end_time}s] {word.word}")
|
| 54 |
|
| 55 |
# Create a list to hold the translated lines.
|
| 56 |
translated_lines = []
|
| 57 |
|
| 58 |
with open(transcript_file, "w+", encoding="utf-8") as f:
|
| 59 |
counter = 1
|
| 60 |
-
for
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
# Move the file pointer to the beginning of the file.
|
| 79 |
f.seek(0)
|
| 80 |
|
|
|
|
| 42 |
run(["ffmpeg", "-i", Video, audio_file])
|
| 43 |
transcript_file = f"{common_uuid}.srt"
|
| 44 |
# Transcription with Whisper.
|
| 45 |
+
print("Iniciando transcrição com Whisper")
|
| 46 |
+
segments, _ = whisper_model.transcribe(audio_file, beam_size=5)
|
| 47 |
+
segments = list(segments)
|
| 48 |
+
transcript_file = f"{current_path}/{common_uuid}.srt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# Create a list to hold the translated lines.
|
| 51 |
translated_lines = []
|
| 52 |
|
| 53 |
with open(transcript_file, "w+", encoding="utf-8") as f:
|
| 54 |
counter = 1
|
| 55 |
+
for segment in segments:
|
| 56 |
+
start_hours = int(segment.start // 3600)
|
| 57 |
+
start_minutes = int((segment.start % 3600) // 60)
|
| 58 |
+
start_seconds = int(segment.start % 60)
|
| 59 |
+
start_milliseconds = int((segment.start - int(segment.start)) * 1000)
|
| 60 |
+
|
| 61 |
+
end_hours = int(segment.end // 3600)
|
| 62 |
+
end_minutes = int((segment.end % 3600) // 60)
|
| 63 |
+
end_seconds = int(segment.end % 60)
|
| 64 |
+
end_milliseconds = int((segment.end - int(segment.end)) * 1000)
|
| 65 |
+
|
| 66 |
+
formatted_start = f"{start_hours:02d}:{start_minutes:02d}:{start_seconds:02d},{start_milliseconds:03d}"
|
| 67 |
+
formatted_end = f"{end_hours:02d}:{end_minutes:02d}:{end_seconds:02d},{end_milliseconds:03d}"
|
| 68 |
+
|
| 69 |
+
f.write(f"{counter}\n")
|
| 70 |
+
f.write(f"{formatted_start} --> {formatted_end}\n")
|
| 71 |
+
f.write(f"{segment.text}\n\n")
|
| 72 |
+
counter += 1
|
| 73 |
# Move the file pointer to the beginning of the file.
|
| 74 |
f.seek(0)
|
| 75 |
|