Spaces:

Agents-MCP-Hackathon
/

pdf_explainer

Sleeping

File size: 7,346 Bytes

d1c4aa1

#!/usr/bin/env python3
"""
Quick demonstration script for the Enhanced Chatterbox TTS API
Shows how to use the new full-text endpoints for processing long documents
"""

import requests
import os
from pathlib import Path
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

def demo_full_text_processing():
    """Demonstrate full-text processing with a sample document"""
    
    # Sample long text (like from a PDF)
    sample_document = """
    Artificial Intelligence has revolutionized numerous industries and continues to shape our world in unprecedented ways. From healthcare to transportation, AI systems are becoming increasingly sophisticated and capable of performing complex tasks that were once thought to be exclusively human domains.

    In healthcare, AI-powered diagnostic systems can now identify diseases with remarkable accuracy, sometimes surpassing human doctors in specific areas. Machine learning algorithms analyze medical images, predict patient outcomes, and assist in drug discovery processes. This technological advancement has the potential to make healthcare more accessible and effective globally.

    The transportation sector has also witnessed significant AI integration. Autonomous vehicles use computer vision, sensor fusion, and deep learning to navigate complex environments safely. These systems process vast amounts of real-time data to make split-second decisions, potentially reducing traffic accidents and improving transportation efficiency.

    However, with these advancements come important ethical considerations. Issues of privacy, job displacement, and algorithmic bias must be carefully addressed as AI systems become more prevalent in society. It is crucial that we develop AI responsibly, ensuring that these powerful technologies benefit humanity while minimizing potential risks.

    The future of AI holds immense promise, but it requires thoughtful implementation and continuous oversight to ensure that its development aligns with human values and societal needs.
    """
    
    endpoint = os.getenv("FULL_TEXT_TTS_ENDPOINT")
    if not endpoint:
        print("❌ FULL_TEXT_TTS_ENDPOINT not configured")
        print("Please set the environment variable or update your .env file")
        return False
    
    print("🎙️ Enhanced Chatterbox TTS API Demo")
    print("=" * 50)
    print(f"Processing document ({len(sample_document)} characters)...")
    
    try:
        # Send request to full-text endpoint
        response = requests.post(
            endpoint,
            json={
                "text": sample_document.strip(),
                "max_chunk_size": 600,  # Smaller chunks for better processing
                "silence_duration": 0.6,  # Slightly longer pause between chunks
                "fade_duration": 0.2,  # Smooth transitions
                "overlap_sentences": 1  # Overlap for better continuity
            },
            timeout=180  # Allow time for processing
        )
        
        if response.status_code == 200:
            # Save the generated audio
            Path("demo_output").mkdir(exist_ok=True)
            output_file = "demo_output/ai_document_speech.wav"
            
            with open(output_file, "wb") as f:
                f.write(response.content)
            
            # Extract processing information from headers
            duration = response.headers.get('X-Audio-Duration', 'unknown')
            chunks = response.headers.get('X-Chunks-Processed', 'unknown')
            characters = response.headers.get('X-Total-Characters', 'unknown')
            
            print("✅ Success! Audio generated and saved")
            print(f"📁 File: {output_file}")
            print(f"⏱️  Duration: {duration} seconds")
            print(f"🧩 Chunks processed: {chunks}")
            print(f"📝 Characters: {characters}")
            print(f"💾 File size: {Path(output_file).stat().st_size / 1024:.1f} KB")
            
            return True
        else:
            print(f"❌ Request failed with status {response.status_code}")
            print(f"Response: {response.text}")
            return False
            
    except requests.exceptions.Timeout:
        print("⏰ Request timed out - the document might be too long")
        return False
    except Exception as e:
        print(f"❌ Error: {e}")
        return False


def demo_comparison():
    """Compare standard vs full-text processing"""
    
    short_text = "This is a short text for comparison."
    medium_text = """
    This is a medium-length text that demonstrates the difference between 
    standard and full-text processing endpoints. The full-text endpoint 
    provides better handling for longer content with intelligent chunking 
    and server-side concatenation.
    """
    
    standard_endpoint = os.getenv("GENERATE_AUDIO_ENDPOINT")
    fulltext_endpoint = os.getenv("FULL_TEXT_TTS_ENDPOINT")
    
    if not (standard_endpoint and fulltext_endpoint):
        print("⚠️ Missing endpoint configuration for comparison")
        return False
    
    print("\n🔍 Comparison Demo")
    print("=" * 30)
    
    try:
        import time
        
        # Test standard endpoint
        print("Testing standard endpoint...")
        start_time = time.time()
        response1 = requests.post(
            standard_endpoint,
            json={"text": short_text},
            timeout=30
        )
        standard_time = time.time() - start_time
        
        # Test full-text endpoint
        print("Testing full-text endpoint...")
        start_time = time.time()
        response2 = requests.post(
            fulltext_endpoint,
            json={"text": medium_text.strip(), "max_chunk_size": 400},
            timeout=60
        )
        fulltext_time = time.time() - start_time
        
        print(f"\n📊 Results:")
        print(f"Standard endpoint: {standard_time:.2f}s (short text)")
        print(f"Full-text endpoint: {fulltext_time:.2f}s (medium text)")
        
        if response2.status_code == 200:
            chunks = response2.headers.get('X-Chunks-Processed', 'unknown')
            print(f"Full-text chunks processed: {chunks}")
        
        return True
        
    except Exception as e:
        print(f"❌ Comparison error: {e}")
        return False


def main():
    """Run the demonstration"""
    print("🚀 Enhanced Chatterbox TTS API Demonstration")
    print("This demo showcases the new full-text processing capabilities")
    print()
    
    # Check if .env file exists
    if not Path(".env").exists():
        print("📝 Creating sample .env file...")
        print("Please update it with your actual Modal endpoint URLs")
        
        env_content = """# Enhanced Chatterbox TTS API Endpoints
FULL_TEXT_TTS_ENDPOINT=https://YOUR-MODAL-ENDPOINT.modal.run/generate_full_text_audio
GENERATE_AUDIO_ENDPOINT=https://YOUR-MODAL-ENDPOINT.modal.run/generate_audio
"""
        with open(".env", "w") as f:
            f.write(env_content)
        print("✅ Sample .env file created")
        return
    
    # Run demonstrations
    demo_full_text_processing()
    demo_comparison()
    
    print("\n🎉 Demo complete!")
    print("Check the demo_output/ directory for generated audio files")


if __name__ == "__main__":
    main()