Spaces:
Runtime error
Runtime error
| import time | |
| import os | |
| import requests | |
| import subprocess | |
| import tempfile | |
| import gradio as gr | |
| import openai | |
| import whisper | |
| # Selenium imports | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.chrome.service import Service | |
| def is_direct_video_url(url: str): | |
| """ | |
| Naive check if the user input is a direct link to a video file. | |
| You can expand this list as needed (e.g. .mp4, .webm, .mov, .avi). | |
| """ | |
| video_extensions = (".mp4", ".webm", ".mov", ".avi", ".mkv") | |
| return url.lower().endswith(video_extensions) | |
| def get_video_url(page_url): | |
| """ | |
| Uses Selenium in headless mode to load the page and extract the video URL from a <video> element. | |
| Adjust the element-finding logic if the video is embedded differently. | |
| """ | |
| chrome_options = Options() | |
| chrome_options.add_argument("--headless") | |
| chrome_options.add_argument("--no-sandbox") | |
| chrome_options.add_argument("--disable-dev-shm-usage") | |
| # Location of Chromium browser | |
| chrome_options.binary_location = "/usr/bin/chromium-browser" | |
| # Location of the matching Chromedriver | |
| service = Service("/usr/bin/chromedriver") | |
| driver = webdriver.Chrome(service=service, options=chrome_options) | |
| driver.get(page_url) | |
| # Wait for JavaScript to render the video element | |
| time.sleep(5) | |
| try: | |
| video_element = driver.find_element("tag name", "video") | |
| video_url = video_element.get_attribute("src") | |
| except Exception as e: | |
| driver.quit() | |
| raise Exception("Could not locate a <video> element. The page structure may differ: " + str(e)) | |
| driver.quit() | |
| return video_url | |
| def download_video(video_url, output_path): | |
| """ | |
| Downloads the video from the extracted URL (or direct link) to a local file. | |
| """ | |
| response = requests.get(video_url, stream=True) | |
| if response.status_code != 200: | |
| raise Exception("Failed to download video; status code: " + str(response.status_code)) | |
| with open(output_path, "wb") as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| f.write(chunk) | |
| return output_path | |
| def extract_audio(video_file, audio_file): | |
| """ | |
| Uses FFmpeg to extract the audio track from the video. | |
| """ | |
| command = [ | |
| "ffmpeg", | |
| "-i", video_file, # Input video file | |
| "-vn", # Disable video output | |
| "-acodec", "pcm_s16le", # Audio codec for WAV | |
| "-ar", "44100", # Sample rate | |
| "-ac", "2", # Stereo channels | |
| audio_file | |
| ] | |
| try: | |
| subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| except subprocess.CalledProcessError as e: | |
| raise Exception("FFmpeg failed to extract audio: " + str(e)) | |
| return audio_file | |
| def transcribe_audio(audio_file): | |
| """ | |
| Loads the Whisper model and transcribes the provided audio file. | |
| """ | |
| model = whisper.load_model("base") | |
| result = model.transcribe(audio_file) | |
| return result["text"] | |
| def summarize_text(transcription, openai_api_key, model_name="text-davinci-003"): | |
| """ | |
| Uses the OpenAI API to summarize the transcription. | |
| """ | |
| openai.api_key = openai_api_key | |
| prompt = ( | |
| "Please summarize the following transcription of a video lecture concisely:\n\n" | |
| f"{transcription}\n\nSummary:" | |
| ) | |
| response = openai.Completion.create( | |
| engine=model_name, | |
| prompt=prompt, | |
| max_tokens=150, | |
| temperature=0.5 | |
| ) | |
| return response.choices[0].text.strip() | |
| def process_page(page_url, openai_api_key): | |
| """ | |
| Main function that: | |
| 1. Checks if user input is direct video URL or a page. | |
| 2. Scrapes for video URL if needed. | |
| 3. Downloads the video. | |
| 4. Extracts the audio using FFmpeg. | |
| 5. Transcribes audio with Whisper. | |
| 6. Summarizes via OpenAI. | |
| Returns (video_url, transcription, summary). | |
| """ | |
| try: | |
| if is_direct_video_url(page_url): | |
| # The user provided a direct video link; no scraping needed | |
| video_url = page_url | |
| else: | |
| # The user provided a page URL, | |