Spaces:
No application file
No application file
| import whisper | |
| import os | |
| from moviepy.editor import VideoFileClip | |
| from pydub import AudioSegment | |
| from jiwer import wer | |
| from faster_whisper import WhisperModel | |
| from transformers import pipeline | |
| def extract_audio(video_path, audio_path="extracted_audio.wav"): | |
| """Extracting Audio from Video""" | |
| video = VideoFileClip(video_path) | |
| video.audio.write_audiofile(audio_path) | |
| return audio_path | |
| def transcribe_audio(audio_path, model_size="large"): | |
| """Converting Audio to Text Using Whisper""" | |
| model = whisper.load_model(model_size) | |
| result = model.transcribe(audio_path) | |
| return result["text"] | |
| def video_to_text(video_path, model_size="large"): | |
| """Converting Video to Text""" | |
| print("Extracting audio from the video...") | |
| audio_path = extract_audio(video_path) | |
| print("Converting audio to text...๐") | |
| text = transcribe_audio(audio_path, model_size) | |
| os.remove(audio_path) | |
| print("Text extraction completed!") | |
| return text | |
| # Function to calculate WER between the ground truth and predicted text | |
| def calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large"): | |
| """Calculate Word Error Rate (WER) between ground truth and predicted text""" | |
| # Extract text from both videos (ground truth and predicted) | |
| ground_truth_text = video_to_text(ground_truth_video_path, model_size) | |
| predicted_text = video_to_text(predicted_video_path, model_size) | |
| # Calculate WER | |
| error_rate = wer(ground_truth_text, predicted_text) | |
| print(f"Word Error Rate (WER): {error_rate:.2f}") | |
| return error_rate | |
| # Example usage | |
| ground_truth_video_path = "/content/need_job.mp4" | |
| predicted_video_path = "/content/need_job.mp4" | |
| calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large") | |
| def chunk_text(text, max_words=500): | |
| """Split long text into chunks based on word count""" | |
| words = text.split() | |
| chunks = [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)] | |
| return chunks | |
| def summarize_text(text): | |
| """Summarize long text regardless of length""" | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| text_chunks = chunk_text(text) | |
| summaries = [] | |
| for chunk in text_chunks: | |
| summary = summarizer(chunk, max_length=180, min_length=60, do_sample=False) | |
| summaries.append(summary[0]['summary_text']) | |
| full_summary = " ".join(summaries) | |
| # Optional: Generate a final concise summary | |
| if len(text_chunks) > 1: | |
| final_summary = summarizer(full_summary, max_length=120, min_length=40, do_sample=False) | |
| return final_summary[0]['summary_text'] | |
| return full_summary | |
| if name == "main": | |
| video_path = "/content/need_job.mp4" | |
| extracted_text = video_to_text(video_path, model_size="large") | |
| print("\n Extracted text:\n", extracted_text) | |
| print("\n Generating Summary...") | |
| summary = summarize_text(extracted_text) | |
| print("\nSummary:\n", summary) |