File size: 4,242 Bytes
60616ea
 
 
 
 
 
798778b
 
 
 
 
60616ea
 
798778b
60616ea
e17c4f2
 
 
 
 
 
 
 
60616ea
 
 
 
 
 
 
 
967c795
e17c4f2
967c795
e17c4f2
967c795
 
798778b
60616ea
 
967c795
60616ea
 
 
 
 
 
 
 
 
 
 
 
 
 
e17c4f2
60616ea
 
 
 
 
 
 
 
 
 
 
 
 
e17c4f2
60616ea
 
 
 
e17c4f2
60616ea
e17c4f2
 
60616ea
 
 
 
 
 
 
 
 
 
 
 
 
 
e17c4f2
60616ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e17c4f2
60616ea
 
 
e17c4f2
 
 
 
 
 
 
60616ea
e17c4f2
60616ea
 
e17c4f2
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import time
import os
import requests
import subprocess
import tempfile

import gradio as gr
import openai
import whisper

# Selenium imports
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service

def is_direct_video_url(url: str):
    """
    Naive check if the user input is a direct link to a video file.
    You can expand this list as needed (e.g. .mp4, .webm, .mov, .avi).
    """
    video_extensions = (".mp4", ".webm", ".mov", ".avi", ".mkv")
    return url.lower().endswith(video_extensions)

def get_video_url(page_url):
    """
    Uses Selenium in headless mode to load the page and extract the video URL from a <video> element.
    Adjust the element-finding logic if the video is embedded differently.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    # Location of Chromium browser
    chrome_options.binary_location = "/usr/bin/chromium-browser"
    # Location of the matching Chromedriver
    service = Service("/usr/bin/chromedriver")

    driver = webdriver.Chrome(service=service, options=chrome_options)
    driver.get(page_url)
    
    # Wait for JavaScript to render the video element
    time.sleep(5)
    
    try:
        video_element = driver.find_element("tag name", "video")
        video_url = video_element.get_attribute("src")
    except Exception as e:
        driver.quit()
        raise Exception("Could not locate a <video> element. The page structure may differ: " + str(e))
    
    driver.quit()
    return video_url

def download_video(video_url, output_path):
    """
    Downloads the video from the extracted URL (or direct link) to a local file.
    """
    response = requests.get(video_url, stream=True)
    if response.status_code != 200:
        raise Exception("Failed to download video; status code: " + str(response.status_code))
    
    with open(output_path, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)
    return output_path

def extract_audio(video_file, audio_file):
    """
    Uses FFmpeg to extract the audio track from the video.
    """
    command = [
        "ffmpeg",
        "-i", video_file,         # Input video file
        "-vn",                    # Disable video output
        "-acodec", "pcm_s16le",   # Audio codec for WAV
        "-ar", "44100",           # Sample rate
        "-ac", "2",               # Stereo channels
        audio_file
    ]
    try:
        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
        raise Exception("FFmpeg failed to extract audio: " + str(e))
    return audio_file

def transcribe_audio(audio_file):
    """
    Loads the Whisper model and transcribes the provided audio file.
    """
    model = whisper.load_model("base")
    result = model.transcribe(audio_file)
    return result["text"]

def summarize_text(transcription, openai_api_key, model_name="text-davinci-003"):
    """
    Uses the OpenAI API to summarize the transcription.
    """
    openai.api_key = openai_api_key
    prompt = (
        "Please summarize the following transcription of a video lecture concisely:\n\n"
        f"{transcription}\n\nSummary:"
    )
    
    response = openai.Completion.create(
        engine=model_name,
        prompt=prompt,
        max_tokens=150,
        temperature=0.5
    )
    return response.choices[0].text.strip()

def process_page(page_url, openai_api_key):
    """
    Main function that:
      1. Checks if user input is direct video URL or a page.
      2. Scrapes for video URL if needed.
      3. Downloads the video.
      4. Extracts the audio using FFmpeg.
      5. Transcribes audio with Whisper.
      6. Summarizes via OpenAI.
    
    Returns (video_url, transcription, summary).
    """
    try:
        if is_direct_video_url(page_url):
            # The user provided a direct video link; no scraping needed
            video_url = page_url
        else:
            # The user provided a page URL,