video-analysis-qa-system / processing.py
ahohpotato's picture
Upload 8 files
a4d5e9e verified
import time
def process_video(video_path, session_id, models, conn):
"""Original process_video function - maintains compatibility"""
try:
# Import your modules
from captions import extract_frames, generate_caption
from audio import extract_audio, transcribe_audio
# Extract frames with default interval
print("Extracting frames...")
frames, timestamps = extract_frames(video_path, interval=0.5)
if not frames:
print("No frames could be extracted from the video.")
return
# Generate captions
print(f"Generating captions for {len(frames)} frames...")
cursor = conn.cursor()
for i, (frame, timestamp) in enumerate(zip(frames, timestamps)):
caption = generate_caption(frame, models)
cursor.execute(
"INSERT INTO captions (session_id, timestamp, caption) VALUES (?, ?, ?)",
(session_id, timestamp, caption)
)
# Update status every 10 frames
if i % 10 == 0:
print(f"Generating captions... {i+1}/{len(frames)}")
conn.commit()
# Extract and transcribe audio
print("Extracting and transcribing audio...")
audio, sr = extract_audio(video_path)
if audio is not None and len(audio) > 0:
transcription = transcribe_audio(audio, sr, models)
cursor.execute(
"INSERT INTO transcriptions (session_id, transcription) VALUES (?, ?)",
(session_id, transcription)
)
conn.commit()
else:
print("No audio found in the video or audio extraction failed.")
print("Processing complete!")
except Exception as e:
print(f"Error processing video: {str(e)}")
def process_video_with_fps(video_path, session_id, models, conn, fps=5):
"""Enhanced process_video function with FPS control"""
try:
# Import your modules
from captions import extract_frames_with_fps, generate_caption, batch_generate_captions
from audio import extract_audio, transcribe_audio
# Calculate interval from FPS
interval = 1.0 / fps
# Extract frames with custom FPS
print(f"Extracting frames at {fps} FPS (interval: {interval:.2f}s)...")
frames, timestamps = extract_frames_with_fps(video_path, interval=interval)
if not frames:
print("No frames could be extracted from the video.")
return
# Generate captions (use batch processing for efficiency)
print(f"Generating captions for {len(frames)} frames...")
cursor = conn.cursor()
# Option 1: Batch processing (more efficient)
try:
captions = batch_generate_captions(frames, models, batch_size=4)
# Insert all captions
for i, (timestamp, caption) in enumerate(zip(timestamps, captions)):
cursor.execute(
"INSERT INTO captions (session_id, timestamp, caption) VALUES (?, ?, ?)",
(session_id, timestamp, caption)
)
if i % 10 == 0:
print(f"Inserting captions... {i+1}/{len(captions)}")
except:
# Option 2: Fallback to individual processing
print("Batch processing failed, using individual processing...")
for i, (frame, timestamp) in enumerate(zip(frames, timestamps)):
caption = generate_caption(frame, models)
cursor.execute(
"INSERT INTO captions (session_id, timestamp, caption) VALUES (?, ?, ?)",
(session_id, timestamp, caption)
)
if i % 10 == 0:
print(f"Generating captions... {i+1}/{len(frames)}")
conn.commit()
# Extract and transcribe audio
print("Extracting and transcribing audio...")
audio, sr = extract_audio(video_path)
if audio is not None and len(audio) > 0:
transcription = transcribe_audio(audio, sr, models)
cursor.execute(
"INSERT INTO transcriptions (session_id, transcription) VALUES (?, ?)",
(session_id, transcription)
)
conn.commit()
else:
print("No audio found in the video or audio extraction failed.")
print("Processing complete!")
except Exception as e:
print(f"Error processing video with FPS: {str(e)}")
# Fallback to original function
print("Falling back to original processing...")
process_video(video_path, session_id, models, conn)