File size: 5,055 Bytes
a4d5e9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import time

def process_video(video_path, session_id, models, conn):
    """Original process_video function - maintains compatibility"""
    
    try:
        # Import your modules
        from captions import extract_frames, generate_caption
        from audio import extract_audio, transcribe_audio
        
        # Extract frames with default interval
        print("Extracting frames...")
        frames, timestamps = extract_frames(video_path, interval=0.5)
        
        if not frames:
            print("No frames could be extracted from the video.")
            return
        
        # Generate captions
        print(f"Generating captions for {len(frames)} frames...")
        cursor = conn.cursor()
        
        for i, (frame, timestamp) in enumerate(zip(frames, timestamps)):
            caption = generate_caption(frame, models)
            cursor.execute(
                "INSERT INTO captions (session_id, timestamp, caption) VALUES (?, ?, ?)",
                (session_id, timestamp, caption)
            )
            
            # Update status every 10 frames
            if i % 10 == 0:
                print(f"Generating captions... {i+1}/{len(frames)}")
        
        conn.commit()
        
        # Extract and transcribe audio
        print("Extracting and transcribing audio...")
        audio, sr = extract_audio(video_path)
        
        if audio is not None and len(audio) > 0:
            transcription = transcribe_audio(audio, sr, models)
            cursor.execute(
                "INSERT INTO transcriptions (session_id, transcription) VALUES (?, ?)",
                (session_id, transcription)
            )
            conn.commit()
        else:
            print("No audio found in the video or audio extraction failed.")
        
        print("Processing complete!")
        
    except Exception as e:
        print(f"Error processing video: {str(e)}")

def process_video_with_fps(video_path, session_id, models, conn, fps=5):
    """Enhanced process_video function with FPS control"""
    
    try:
        # Import your modules
        from captions import extract_frames_with_fps, generate_caption, batch_generate_captions
        from audio import extract_audio, transcribe_audio
        
        # Calculate interval from FPS
        interval = 1.0 / fps
        
        # Extract frames with custom FPS
        print(f"Extracting frames at {fps} FPS (interval: {interval:.2f}s)...")
        frames, timestamps = extract_frames_with_fps(video_path, interval=interval)
        
        if not frames:
            print("No frames could be extracted from the video.")
            return
        
        # Generate captions (use batch processing for efficiency)
        print(f"Generating captions for {len(frames)} frames...")
        cursor = conn.cursor()
        
        # Option 1: Batch processing (more efficient)
        try:
            captions = batch_generate_captions(frames, models, batch_size=4)
            
            # Insert all captions
            for i, (timestamp, caption) in enumerate(zip(timestamps, captions)):
                cursor.execute(
                    "INSERT INTO captions (session_id, timestamp, caption) VALUES (?, ?, ?)",
                    (session_id, timestamp, caption)
                )
                
                if i % 10 == 0:
                    print(f"Inserting captions... {i+1}/{len(captions)}")
        
        except:
            # Option 2: Fallback to individual processing
            print("Batch processing failed, using individual processing...")
            for i, (frame, timestamp) in enumerate(zip(frames, timestamps)):
                caption = generate_caption(frame, models)
                cursor.execute(
                    "INSERT INTO captions (session_id, timestamp, caption) VALUES (?, ?, ?)",
                    (session_id, timestamp, caption)
                )
                
                if i % 10 == 0:
                    print(f"Generating captions... {i+1}/{len(frames)}")
        
        conn.commit()
        
        # Extract and transcribe audio
        print("Extracting and transcribing audio...")
        audio, sr = extract_audio(video_path)
        
        if audio is not None and len(audio) > 0:
            transcription = transcribe_audio(audio, sr, models)
            cursor.execute(
                "INSERT INTO transcriptions (session_id, transcription) VALUES (?, ?)",
                (session_id, transcription)
            )
            conn.commit()
        else:
            print("No audio found in the video or audio extraction failed.")
        
        print("Processing complete!")
        
    except Exception as e:
        print(f"Error processing video with FPS: {str(e)}")
        # Fallback to original function
        print("Falling back to original processing...")
        process_video(video_path, session_id, models, conn)