import os import sys import time import json import argparse # Add the project root to the Python path sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) # Now import from the behavior_backend package from behavior_backend.app.services.processing.video_processor import VideoProcessor def test_video_processor(video_path, language='en', service='whisper', backend='mediapipe', frame_rate=1): """ Test the video processor with different transcription services. Args: video_path: Path to the video file language: Language code service: Transcription service to use ('whisper', 'groq', 'google_cloud', 'openai_whisper') backend: Backend to use for face detection frame_rate: Process every nth frame """ print(f"\n=== Testing Video Processor ===") print(f"Video: {video_path}") print(f"Language: {language}") print(f"Transcription Service: {service}") print(f"Face Detection Backend: {backend}") print(f"Frame Rate: {frame_rate}") # Initialize the video processor processor = VideoProcessor() # Check available cloud services available_services = list(processor.speech_service.cloud_transcription_service.available_recognizers.keys()) print(f"Available cloud services: {', '.join(available_services)}") # Modify the speech service to use the specified service original_process_video_speech = processor.speech_service.process_video_speech processor.speech_service.process_video_speech = lambda video_path, language: original_process_video_speech(video_path, language, service) # Process the video start_time = time.time() try: transcript, analysis_json = processor.process_video( video_path=video_path, frame_rate=frame_rate, backend=backend, language=language, generate_annotated_video=False ) end_time = time.time() print(f"\nProcessing completed in {end_time - start_time:.2f} seconds") print(f"Transcript length: {len(transcript)} characters") # Save results base_name = os.path.basename(video_path).split('.')[0] # Save transcript transcript_file = f"{base_name}_{service}_transcript.txt" with open(transcript_file, 'w') as f: f.write(transcript) print(f"Transcript saved to {transcript_file}") # Save analysis analysis_file = f"{base_name}_{service}_analysis.json" with open(analysis_file, 'w') as f: f.write(analysis_json) print(f"Analysis saved to {analysis_file}") # Print transcript preview print("\nTranscript preview (first 500 characters):") print("-" * 80) print(transcript[:500] + "..." if len(transcript) > 500 else transcript) print("-" * 80) # Print analysis preview print("\nAnalysis preview:") print("-" * 80) analysis = json.loads(analysis_json) if 'summary' in analysis: print(f"Summary: {analysis['summary']}") if 'key_points' in analysis: print("\nKey Points:") for point in analysis['key_points'][:3]: # Show first 3 key points print(f"- {point}") if len(analysis['key_points']) > 3: print(f"... and {len(analysis['key_points']) - 3} more key points") print("-" * 80) return True except Exception as e: print(f"Error: {str(e)}") return False def main(): parser = argparse.ArgumentParser(description='Test video processor with different transcription services') parser.add_argument('video_path', help='Path to the video file') parser.add_argument('--language', '-l', default='en', help='Language code (default: en)') parser.add_argument('--service', '-s', default='whisper', choices=['whisper', 'groq', 'google_cloud', 'openai_whisper'], help='Transcription service to use (default: whisper)') parser.add_argument('--backend', '-b', default='mediapipe', choices=['mediapipe', 'ssd', 'mtcnn'], help='Backend to use for face detection (default: mediapipe)') parser.add_argument('--frame-rate', '-f', type=int, default=1, help='Process every nth frame (default: 1)') args = parser.parse_args() test_video_processor( args.video_path, args.language, args.service, args.backend, args.frame_rate ) if __name__ == "__main__": main()