import whisper import os from moviepy.editor import VideoFileClip from pydub import AudioSegment from jiwer import wer from faster_whisper import WhisperModel from transformers import pipeline def extract_audio(video_path, audio_path="extracted_audio.wav"): """Extracting Audio from Video""" video = VideoFileClip(video_path) video.audio.write_audiofile(audio_path) return audio_path def transcribe_audio(audio_path, model_size="large"): """Converting Audio to Text Using Whisper""" model = whisper.load_model(model_size) result = model.transcribe(audio_path) return result["text"] def video_to_text(video_path, model_size="large"): """Converting Video to Text""" print("Extracting audio from the video...") audio_path = extract_audio(video_path) print("Converting audio to text...🔊") text = transcribe_audio(audio_path, model_size) os.remove(audio_path) print("Text extraction completed!") return text # Function to calculate WER between the ground truth and predicted text def calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large"): """Calculate Word Error Rate (WER) between ground truth and predicted text""" # Extract text from both videos (ground truth and predicted) ground_truth_text = video_to_text(ground_truth_video_path, model_size) predicted_text = video_to_text(predicted_video_path, model_size) # Calculate WER error_rate = wer(ground_truth_text, predicted_text) print(f"Word Error Rate (WER): {error_rate:.2f}") return error_rate # Example usage ground_truth_video_path = "/content/need_job.mp4" predicted_video_path = "/content/need_job.mp4" calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large") def chunk_text(text, max_words=500): """Split long text into chunks based on word count""" words = text.split() chunks = [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)] return chunks def summarize_text(text): """Summarize long text regardless of length""" summarizer = pipeline("summarization", model="facebook/bart-large-cnn") text_chunks = chunk_text(text) summaries = [] for chunk in text_chunks: summary = summarizer(chunk, max_length=180, min_length=60, do_sample=False) summaries.append(summary[0]['summary_text']) full_summary = " ".join(summaries) # Optional: Generate a final concise summary if len(text_chunks) > 1: final_summary = summarizer(full_summary, max_length=120, min_length=40, do_sample=False) return final_summary[0]['summary_text'] return full_summary if name == "main": video_path = "/content/need_job.mp4" extracted_text = video_to_text(video_path, model_size="large") print("\n Extracted text:\n", extracted_text) print("\n Generating Summary...") summary = summarize_text(extracted_text) print("\nSummary:\n", summary)