Spaces:

ayloll
/

video2textupload

No application file

App Files Files Community

video2textupload / app.py

ayloll

Upload 2 files

1057143 verified 8 months ago

raw

history blame contribute delete

3.01 kB

	import whisper
	import os
	from moviepy.editor import VideoFileClip
	from pydub import AudioSegment
	from jiwer import wer
	from faster_whisper import WhisperModel
	from transformers import pipeline
	def extract_audio(video_path, audio_path="extracted_audio.wav"):
	"""Extracting Audio from Video"""
	video = VideoFileClip(video_path)
	video.audio.write_audiofile(audio_path)
	return audio_path
	def transcribe_audio(audio_path, model_size="large"):
	"""Converting Audio to Text Using Whisper"""
	model = whisper.load_model(model_size)
	result = model.transcribe(audio_path)
	return result["text"]
	def video_to_text(video_path, model_size="large"):
	"""Converting Video to Text"""
	print("Extracting audio from the video...")
	audio_path = extract_audio(video_path)
	print("Converting audio to text...🔊")
	text = transcribe_audio(audio_path, model_size)
	os.remove(audio_path)
	print("Text extraction completed!")
	return text
	# Function to calculate WER between the ground truth and predicted text
	def calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large"):
	"""Calculate Word Error Rate (WER) between ground truth and predicted text"""
	# Extract text from both videos (ground truth and predicted)
	ground_truth_text = video_to_text(ground_truth_video_path, model_size)
	predicted_text = video_to_text(predicted_video_path, model_size)
	# Calculate WER
	error_rate = wer(ground_truth_text, predicted_text)
	print(f"Word Error Rate (WER): {error_rate:.2f}")
	return error_rate
	# Example usage
	ground_truth_video_path = "/content/need_job.mp4"
	predicted_video_path = "/content/need_job.mp4"

	calculate_wer(ground_truth_video_path, predicted_video_path, model_size="large")
	def chunk_text(text, max_words=500):
	"""Split long text into chunks based on word count"""
	words = text.split()
	chunks = [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
	return chunks

	def summarize_text(text):
	"""Summarize long text regardless of length"""
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	text_chunks = chunk_text(text)

	summaries = []
	for chunk in text_chunks:
	summary = summarizer(chunk, max_length=180, min_length=60, do_sample=False)
	summaries.append(summary[0]['summary_text'])

	full_summary = " ".join(summaries)

	# Optional: Generate a final concise summary
	if len(text_chunks) > 1:
	final_summary = summarizer(full_summary, max_length=120, min_length=40, do_sample=False)
	return final_summary[0]['summary_text']
	return full_summary

	if name == "main":
	video_path = "/content/need_job.mp4"
	extracted_text = video_to_text(video_path, model_size="large")
	print("\n Extracted text:\n", extracted_text)
	print("\n Generating Summary...")
	summary = summarize_text(extracted_text)
	print("\nSummary:\n", summary)