ayloll's picture
Upload 2 files
1057143 verified
import whisper
import os
from moviepy.editor import VideoFileClip
from pydub import AudioSegment
from jiwer import wer
from faster_whisper import WhisperModel
from transformers import pipeline
def extract_audio(video_path, audio_path="extracted_audio.wav"):
"""Extracting Audio from Video"""
video = VideoFileClip(video_path)
video.audio.write_audiofile(audio_path)
return audio_path
def transcribe_audio(audio_path, model_size="large"):
"""Converting Audio to Text Using Whisper"""
model = whisper.load_model(model_size)
result = model.transcribe(audio_path)
return result["text"]
def video_to_text(video_path, model_size="large"):
"""Converting Video to Text"""
print("Extracting audio from the video...")
audio_path = extract_audio(video_path)
print("Converting audio to text...๐Ÿ”Š")
text = transcribe_audio(audio_path, model_size)
os.remove(audio_path)
print("Text extraction completed!")
return text
# Function to calculate WER between the ground truth and predicted text
def calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large"):
"""Calculate Word Error Rate (WER) between ground truth and predicted text"""
# Extract text from both videos (ground truth and predicted)
ground_truth_text = video_to_text(ground_truth_video_path, model_size)
predicted_text = video_to_text(predicted_video_path, model_size)
# Calculate WER
error_rate = wer(ground_truth_text, predicted_text)
print(f"Word Error Rate (WER): {error_rate:.2f}")
return error_rate
# Example usage
ground_truth_video_path = "/content/need_job.mp4"
predicted_video_path = "/content/need_job.mp4"
calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large")
def chunk_text(text, max_words=500):
"""Split long text into chunks based on word count"""
words = text.split()
chunks = [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
return chunks
def summarize_text(text):
"""Summarize long text regardless of length"""
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
text_chunks = chunk_text(text)
summaries = []
for chunk in text_chunks:
summary = summarizer(chunk, max_length=180, min_length=60, do_sample=False)
summaries.append(summary[0]['summary_text'])
full_summary = " ".join(summaries)
# Optional: Generate a final concise summary
if len(text_chunks) > 1:
final_summary = summarizer(full_summary, max_length=120, min_length=40, do_sample=False)
return final_summary[0]['summary_text']
return full_summary
if name == "main":
video_path = "/content/need_job.mp4"
extracted_text = video_to_text(video_path, model_size="large")
print("\n Extracted text:\n", extracted_text)
print("\n Generating Summary...")
summary = summarize_text(extracted_text)
print("\nSummary:\n", summary)