Spaces:

urjob
/

test

Sleeping

File size: 4,692 Bytes

8ae78b0

import os
import sys
import time
import json
import argparse

# Add the project root to the Python path
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))

# Now import from the behavior_backend package
from behavior_backend.app.services.processing.video_processor import VideoProcessor

def test_video_processor(video_path, language='en', service='whisper', backend='mediapipe', frame_rate=1):
    """
    Test the video processor with different transcription services.
    
    Args:
        video_path: Path to the video file
        language: Language code
        service: Transcription service to use ('whisper', 'groq', 'google_cloud', 'openai_whisper')
        backend: Backend to use for face detection
        frame_rate: Process every nth frame
    """
    print(f"\n=== Testing Video Processor ===")
    print(f"Video: {video_path}")
    print(f"Language: {language}")
    print(f"Transcription Service: {service}")
    print(f"Face Detection Backend: {backend}")
    print(f"Frame Rate: {frame_rate}")
    
    # Initialize the video processor
    processor = VideoProcessor()
    
    # Check available cloud services
    available_services = list(processor.speech_service.cloud_transcription_service.available_recognizers.keys())
    print(f"Available cloud services: {', '.join(available_services)}")
    
    # Modify the speech service to use the specified service
    original_process_video_speech = processor.speech_service.process_video_speech
    processor.speech_service.process_video_speech = lambda video_path, language: original_process_video_speech(video_path, language, service)
    
    # Process the video
    start_time = time.time()
    try:
        transcript, analysis_json = processor.process_video(
            video_path=video_path,
            frame_rate=frame_rate,
            backend=backend,
            language=language,
            generate_annotated_video=False
        )
        end_time = time.time()
        
        print(f"\nProcessing completed in {end_time - start_time:.2f} seconds")
        print(f"Transcript length: {len(transcript)} characters")
        
        # Save results
        base_name = os.path.basename(video_path).split('.')[0]
        
        # Save transcript
        transcript_file = f"{base_name}_{service}_transcript.txt"
        with open(transcript_file, 'w') as f:
            f.write(transcript)
        print(f"Transcript saved to {transcript_file}")
        
        # Save analysis
        analysis_file = f"{base_name}_{service}_analysis.json"
        with open(analysis_file, 'w') as f:
            f.write(analysis_json)
        print(f"Analysis saved to {analysis_file}")
        
        # Print transcript preview
        print("\nTranscript preview (first 500 characters):")
        print("-" * 80)
        print(transcript[:500] + "..." if len(transcript) > 500 else transcript)
        print("-" * 80)
        
        # Print analysis preview
        print("\nAnalysis preview:")
        print("-" * 80)
        analysis = json.loads(analysis_json)
        if 'summary' in analysis:
            print(f"Summary: {analysis['summary']}")
        if 'key_points' in analysis:
            print("\nKey Points:")
            for point in analysis['key_points'][:3]:  # Show first 3 key points
                print(f"- {point}")
            if len(analysis['key_points']) > 3:
                print(f"... and {len(analysis['key_points']) - 3} more key points")
        print("-" * 80)
        
        return True
    except Exception as e:
        print(f"Error: {str(e)}")
        return False

def main():
    parser = argparse.ArgumentParser(description='Test video processor with different transcription services')
    parser.add_argument('video_path', help='Path to the video file')
    parser.add_argument('--language', '-l', default='en', help='Language code (default: en)')
    parser.add_argument('--service', '-s', default='whisper', 
                        choices=['whisper', 'groq', 'google_cloud', 'openai_whisper'],
                        help='Transcription service to use (default: whisper)')
    parser.add_argument('--backend', '-b', default='mediapipe',
                        choices=['mediapipe', 'ssd', 'mtcnn'],
                        help='Backend to use for face detection (default: mediapipe)')
    parser.add_argument('--frame-rate', '-f', type=int, default=1,
                        help='Process every nth frame (default: 1)')
    
    args = parser.parse_args()
    test_video_processor(
        args.video_path, 
        args.language, 
        args.service, 
        args.backend, 
        args.frame_rate
    )

if __name__ == "__main__":
    main()