Update app.py
Browse files
app.py
CHANGED
|
@@ -72,7 +72,17 @@ def transcribe_audio(audio_file):
|
|
| 72 |
print(f"Audio duration: {len(audio) / 1000:.2f} seconds")
|
| 73 |
print("Starting transcription...")
|
| 74 |
input_features = whisper_processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features.to(device)
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
| 77 |
|
| 78 |
print(f"Transcription complete. Length: {len(transcription[0])} characters")
|
|
|
|
| 72 |
print(f"Audio duration: {len(audio) / 1000:.2f} seconds")
|
| 73 |
print("Starting transcription...")
|
| 74 |
input_features = whisper_processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features.to(device)
|
| 75 |
+
|
| 76 |
+
# Create attention mask
|
| 77 |
+
attention_mask = torch.ones_like(input_features)
|
| 78 |
+
|
| 79 |
+
# Generate with specific parameters
|
| 80 |
+
predicted_ids = whisper_model.generate(
|
| 81 |
+
input_features,
|
| 82 |
+
attention_mask=attention_mask,
|
| 83 |
+
language='en',
|
| 84 |
+
task='translate'
|
| 85 |
+
)
|
| 86 |
transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
| 87 |
|
| 88 |
print(f"Transcription complete. Length: {len(transcription[0])} characters")
|