Spaces:

ayloll
/

video2textupload

No application file

File size: 3,008 Bytes
import whisper
import os
from moviepy.editor import VideoFileClip
from pydub import AudioSegment
from jiwer import wer
from faster_whisper import WhisperModel
from transformers import pipeline
def extract_audio(video_path, audio_path="extracted_audio.wav"):
    """Extracting Audio from Video"""
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path)
    return audio_path
def transcribe_audio(audio_path, model_size="large"):
    """Converting Audio to Text Using Whisper"""
    model = whisper.load_model(model_size)
    result = model.transcribe(audio_path)
    return result["text"]
def video_to_text(video_path, model_size="large"):
    """Converting Video to Text"""
    print("Extracting audio from the video...")
    audio_path = extract_audio(video_path)
    print("Converting audio to text...🔊")
    text = transcribe_audio(audio_path, model_size)
    os.remove(audio_path)
    print("Text extraction completed!")
    return text
# Function to calculate WER between the ground truth and predicted text
def calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large"):
    """Calculate Word Error Rate (WER) between ground truth and predicted text"""
    # Extract text from both videos (ground truth and predicted)
    ground_truth_text = video_to_text(ground_truth_video_path, model_size)
    predicted_text = video_to_text(predicted_video_path, model_size)
    # Calculate WER
    error_rate = wer(ground_truth_text, predicted_text)
    print(f"Word Error Rate (WER): {error_rate:.2f}")
    return error_rate
# Example usage
ground_truth_video_path = "/content/need_job.mp4"
predicted_video_path = "/content/need_job.mp4"

calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large")
def chunk_text(text, max_words=500):
    """Split long text into chunks based on word count"""
    words = text.split()
    chunks = [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
    return chunks

def summarize_text(text):
    """Summarize long text regardless of length"""
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    text_chunks = chunk_text(text)

    summaries = []
    for chunk in text_chunks:
        summary = summarizer(chunk, max_length=180, min_length=60, do_sample=False)
        summaries.append(summary[0]['summary_text'])

    full_summary = " ".join(summaries)

    # Optional: Generate a final concise summary
    if len(text_chunks) > 1:
        final_summary = summarizer(full_summary, max_length=120, min_length=40, do_sample=False)
        return final_summary[0]['summary_text']
    return full_summary

if name == "main":
    video_path =  "/content/need_job.mp4"
    extracted_text = video_to_text(video_path, model_size="large")
print("\n Extracted text:\n", extracted_text)
print("\n Generating Summary...")
summary = summarize_text(extracted_text)
print("\nSummary:\n", summary)