File size: 3,904 Bytes
6ac638f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7923455
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
Video processing module for Study Companion
Handles video upload, audio extraction, transcription, and chat functionality
"""

import streamlit as st
import tempfile
import os
from moviepy import VideoFileClip
from openai import OpenAI

# Initialize OpenAI client
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)


# ---------------------------
# Core Video Processing Functions
# ---------------------------

def extract_audio(video_path: str) -> str:
    """Extract audio from the video file and save as MP3."""
    try:
        clip = VideoFileClip(video_path)
        audio_path = video_path.replace(".mp4", ".mp3").replace(".mkv", ".mp3").replace(".webm", ".mp3").replace(".mov", ".mp3").replace(".avi", ".mp3")
        clip.audio.write_audiofile(audio_path, codec='mp3', logger=None)
        clip.close()
        return audio_path
    except Exception as e:
        st.error(f"Error extracting audio: {e}")
        return None


def transcribe_audio(audio_path: str) -> str:
    """Transcribe the audio to text using OpenAI's Whisper API."""
    try:
        with open(audio_path, "rb") as audio_file:
            transcript = client.audio.transcriptions.create(
                model="whisper-1", 
                file=audio_file
            )
        return transcript.text
    except Exception as e:
        st.error(f"Error transcribing audio: {e}")
        return ""


def generate_video_summary(transcript_text: str) -> str:
    """Generate a concise summary of the video transcript using OpenAI."""
    prompt = (
        f"Summarize the following video transcript in a concise manner, "
        "highlighting the key points that a student should know.\n\n"
        "Feel free to use bullet points, bold, italics and headers to emphasize key points where necessary.\n\n"
        f"Transcript:\n\n{transcript_text}"
    )
    messages = [
        {"role": "system", "content": "You are an educational assistant that creates clear, structured summaries."},
        {"role": "user", "content": prompt}
    ]
    completion = client.chat.completions.create(
        model="gpt-4o-mini", 
        messages=messages
    )
    return completion.choices[0].message.content.strip()


def chat_with_video(transcript_text: str, conversation_history: list, user_query: str) -> str:
    """Generate a chat response using the video transcript as context."""
    messages = conversation_history + [
        {"role": "user", "content": f"Based on the following video transcript:\n\n{transcript_text}\n\nQuestion: {user_query}"}
    ]
    completion = client.chat.completions.create(
        model="gpt-4o-mini", 
        messages=messages
    )
    return completion.choices[0].message.content.strip()


def process_uploaded_video(uploaded_video) -> tuple:
    """
    Process an uploaded video file: extract audio and transcribe.
    Returns: (transcript_text, video_path) or (None, None) on error
    """
    # Check file size (200MB limit)
    if uploaded_video.size > 200 * 1024 * 1024:
        st.error("File size exceeds 200MB. Please upload a smaller video.")
        return None, None
    
    # Save uploaded video to temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
        tmp.write(uploaded_video.read())
        video_path = tmp.name
    
    # Extract audio
    with st.spinner("🎵 Extracting audio from video..."):
        audio_path = extract_audio(video_path)
    
    if not audio_path:
        return None, None
    
    # Transcribe audio
    with st.spinner("📝 Transcribing audio... This may take a few minutes."):
        transcript_text = transcribe_audio(audio_path)
    
    # Clean up audio file
    try:
        os.unlink(audio_path)
    except:
        pass
    
    if not transcript_text:
        st.error("Failed to transcribe audio.")
        return None, None
    
    return transcript_text, video_path