File size: 4,692 Bytes
8ae78b0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | import os
import sys
import time
import json
import argparse
# Add the project root to the Python path
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
# Now import from the behavior_backend package
from behavior_backend.app.services.processing.video_processor import VideoProcessor
def test_video_processor(video_path, language='en', service='whisper', backend='mediapipe', frame_rate=1):
"""
Test the video processor with different transcription services.
Args:
video_path: Path to the video file
language: Language code
service: Transcription service to use ('whisper', 'groq', 'google_cloud', 'openai_whisper')
backend: Backend to use for face detection
frame_rate: Process every nth frame
"""
print(f"\n=== Testing Video Processor ===")
print(f"Video: {video_path}")
print(f"Language: {language}")
print(f"Transcription Service: {service}")
print(f"Face Detection Backend: {backend}")
print(f"Frame Rate: {frame_rate}")
# Initialize the video processor
processor = VideoProcessor()
# Check available cloud services
available_services = list(processor.speech_service.cloud_transcription_service.available_recognizers.keys())
print(f"Available cloud services: {', '.join(available_services)}")
# Modify the speech service to use the specified service
original_process_video_speech = processor.speech_service.process_video_speech
processor.speech_service.process_video_speech = lambda video_path, language: original_process_video_speech(video_path, language, service)
# Process the video
start_time = time.time()
try:
transcript, analysis_json = processor.process_video(
video_path=video_path,
frame_rate=frame_rate,
backend=backend,
language=language,
generate_annotated_video=False
)
end_time = time.time()
print(f"\nProcessing completed in {end_time - start_time:.2f} seconds")
print(f"Transcript length: {len(transcript)} characters")
# Save results
base_name = os.path.basename(video_path).split('.')[0]
# Save transcript
transcript_file = f"{base_name}_{service}_transcript.txt"
with open(transcript_file, 'w') as f:
f.write(transcript)
print(f"Transcript saved to {transcript_file}")
# Save analysis
analysis_file = f"{base_name}_{service}_analysis.json"
with open(analysis_file, 'w') as f:
f.write(analysis_json)
print(f"Analysis saved to {analysis_file}")
# Print transcript preview
print("\nTranscript preview (first 500 characters):")
print("-" * 80)
print(transcript[:500] + "..." if len(transcript) > 500 else transcript)
print("-" * 80)
# Print analysis preview
print("\nAnalysis preview:")
print("-" * 80)
analysis = json.loads(analysis_json)
if 'summary' in analysis:
print(f"Summary: {analysis['summary']}")
if 'key_points' in analysis:
print("\nKey Points:")
for point in analysis['key_points'][:3]: # Show first 3 key points
print(f"- {point}")
if len(analysis['key_points']) > 3:
print(f"... and {len(analysis['key_points']) - 3} more key points")
print("-" * 80)
return True
except Exception as e:
print(f"Error: {str(e)}")
return False
def main():
parser = argparse.ArgumentParser(description='Test video processor with different transcription services')
parser.add_argument('video_path', help='Path to the video file')
parser.add_argument('--language', '-l', default='en', help='Language code (default: en)')
parser.add_argument('--service', '-s', default='whisper',
choices=['whisper', 'groq', 'google_cloud', 'openai_whisper'],
help='Transcription service to use (default: whisper)')
parser.add_argument('--backend', '-b', default='mediapipe',
choices=['mediapipe', 'ssd', 'mtcnn'],
help='Backend to use for face detection (default: mediapipe)')
parser.add_argument('--frame-rate', '-f', type=int, default=1,
help='Process every nth frame (default: 1)')
args = parser.parse_args()
test_video_processor(
args.video_path,
args.language,
args.service,
args.backend,
args.frame_rate
)
if __name__ == "__main__":
main() |