# https://www.youtube.com/watch?v=SYHPQ0rXzWM

import yt_dlp
import gradio as gr
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import os
os.makedirs("Audios", exist_ok=True)
os.makedirs("logs", exist_ok=True)
os.makedirs("results", exist_ok=True)
os.makedirs("Youtube Videos", exist_ok=True)

# Load the fine-tuned BERT model and tokenizer
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model.eval()

import yt_dlp
import random

user_agents = [
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Safari/605.1.15',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
    'Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Mobile Safari/537.36'
]
def download_youtube_video(video_url, output_path='Youtube Videos/Recent.mp4'):
    ydl_opts = {'format': 'bestaudio/best', 
                'outtmpl': output_path,
                'geo_bypass': True,  # Bypass geo-restrictions
                'no_check_certificate': True,  # Skip SSL certificate verification
                'prefer_insecure': True,  # Use HTTP instead of HTTPS
                'user_agent': random.choice(user_agents),  # Rotate user agents
                'cookies': None,  # Set cookies to None
                'http_chunk_size': 16*1024,  # Set HTTP chunk size to 16KB
               }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])

    return output_path

from pydub import AudioSegment

def mp4_to_wav(mp4_path):
    # Convert MP4 to WAV
    sound = AudioSegment.from_file(mp4_path)
    wav_path = "Audios/pydub_output.wav"
    sound.export(wav_path, format="wav")
    return wav_path

import speech_recognition as sr

def transcribe_audio(audio_path):

    # Transcribe audio
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio = recognizer.record(source)
    text = recognizer.recognize_google(audio)
    
    return text

# Function to classify the transcription
def classify_transcription(transcription):
    inputs = tokenizer(transcription, truncation=True, padding=True, max_length=512, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()
    return 'positive' if predicted_class == 1 else 'negative'

# Complete pipeline function
def classify_youtube_video(video_url):
    video_mp4_path = download_youtube_video(video_url)
    audio_wav_path = mp4_to_wav(video_mp4_path)
    transcription = transcribe_audio(audio_wav_path)
    sentiment = classify_transcription(transcription)
    return transcription, sentiment

# Create Gradio Interface
interface = gr.Interface(
    fn=classify_youtube_video,
    inputs=["text"],
    outputs=["text", "text"],
    title="YouTube Video Sentiment Classifier",
    description="Input a YouTube video URL to transcribe the audio and classify the sentiment as positive or negative."
)

# Launch the interface
interface.launch(inline=False)