Spaces:

hassanmustafa
/

Project-SentimentTube

Sleeping

App Files Files Community

Project-SentimentTube / app.py

hassanmustafa

Update app.py

2f5b0be verified almost 2 years ago

raw

history blame contribute delete

3.25 kB

	# https://www.youtube.com/watch?v=SYHPQ0rXzWM

	import yt_dlp
	import gradio as gr
	from transformers import BertTokenizer, BertForSequenceClassification
	import torch
	import os
	os.makedirs("Audios", exist_ok=True)
	os.makedirs("logs", exist_ok=True)
	os.makedirs("results", exist_ok=True)
	os.makedirs("Youtube Videos", exist_ok=True)

	# Load the fine-tuned BERT model and tokenizer
	model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
	tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
	model.eval()

	import yt_dlp
	import random

	user_agents = [
	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Safari/605.1.15',
	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
	'Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Mobile Safari/537.36'
	]
	def download_youtube_video(video_url, output_path='Youtube Videos/Recent.mp4'):
	ydl_opts = {'format': 'bestaudio/best',
	'outtmpl': output_path,
	'geo_bypass': True, # Bypass geo-restrictions
	'no_check_certificate': True, # Skip SSL certificate verification
	'prefer_insecure': True, # Use HTTP instead of HTTPS
	'user_agent': random.choice(user_agents), # Rotate user agents
	'cookies': None, # Set cookies to None
	'http_chunk_size': 16*1024, # Set HTTP chunk size to 16KB
	}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([video_url])

	return output_path

	from pydub import AudioSegment

	def mp4_to_wav(mp4_path):
	# Convert MP4 to WAV
	sound = AudioSegment.from_file(mp4_path)
	wav_path = "Audios/pydub_output.wav"
	sound.export(wav_path, format="wav")
	return wav_path

	import speech_recognition as sr

	def transcribe_audio(audio_path):

	# Transcribe audio
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio_path) as source:
	audio = recognizer.record(source)
	text = recognizer.recognize_google(audio)

	return text

	# Function to classify the transcription
	def classify_transcription(transcription):
	inputs = tokenizer(transcription, truncation=True, padding=True, max_length=512, return_tensors="pt")
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	predicted_class = torch.argmax(logits, dim=1).item()
	return 'positive' if predicted_class == 1 else 'negative'

	# Complete pipeline function
	def classify_youtube_video(video_url):
	video_mp4_path = download_youtube_video(video_url)
	audio_wav_path = mp4_to_wav(video_mp4_path)
	transcription = transcribe_audio(audio_wav_path)
	sentiment = classify_transcription(transcription)
	return transcription, sentiment

	# Create Gradio Interface
	interface = gr.Interface(
	fn=classify_youtube_video,
	inputs=["text"],
	outputs=["text", "text"],
	title="YouTube Video Sentiment Classifier",
	description="Input a YouTube video URL to transcribe the audio and classify the sentiment as positive or negative."
	)

	# Launch the interface
	interface.launch(inline=False)