Spaces:

empirenexus
/

TranscriptWriting

Sleeping

File size: 6,003 Bytes

e3dec4a

#!/usr/bin/env python3
"""

Patch TranscriptorAI for HuggingFace Spaces deployment

Fixes timeout issues by using HF API instead of local models

"""

import os
import sys

def patch_config():
    """Patch config.py for Spaces"""
    config_path = "config.py"

    with open(config_path, 'r') as f:
        content = f.read()

    # Force HF API backend
    content = content.replace(
        'LLM_BACKEND = os.getenv("LLM_BACKEND", "hf_api")',
        'LLM_BACKEND = "hf_api"  # Forced for HF Spaces'
    )

    # Use lighter model
    content = content.replace(
        'HF_MODEL = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")',
        'HF_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"  # Lighter for Spaces'
    )

    # Reduce timeouts
    content = content.replace(
        'LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120"))',
        'LLM_TIMEOUT = 25  # Spaces timeout limit'
    )

    # Reduce tokens
    content = content.replace(
        'MAX_TOKENS_PER_REQUEST = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300"))',
        'MAX_TOKENS_PER_REQUEST = 100  # Faster for Spaces'
    )

    # Reduce chunk size
    content = content.replace(
        'MAX_CHUNK_TOKENS = int(os.getenv("MAX_CHUNK_TOKENS", "6000"))',
        'MAX_CHUNK_TOKENS = 2000  # Lighter for Spaces'
    )

    with open(config_path, 'w') as f:
        f.write(content)

    print("✓ Patched config.py for HF Spaces")

def patch_app():
    """Patch app.py for Spaces"""
    app_path = "app.py"

    with open(app_path, 'r') as f:
        lines = f.readlines()

    # Add Spaces configuration at top
    spaces_config = '''# HuggingFace Spaces Configuration

import os

os.environ["LLM_BACKEND"] = "hf_api"

os.environ["LLM_TIMEOUT"] = "25"

os.environ["MAX_TOKENS_PER_REQUEST"] = "100"

print("🚀 Running on HuggingFace Spaces - Optimized Configuration Loaded")



'''

    # Insert after imports
    import_end = 0
    for i, line in enumerate(lines):
        if line.startswith('import') or line.startswith('from'):
            import_end = i + 1
        elif import_end > 0 and not line.strip():
            break

    lines.insert(import_end + 1, spaces_config)

    # Find and modify .launch()
    for i, line in enumerate(lines):
        if '.launch()' in line or 'demo.launch()' in line:
            # Replace with queued launch
            lines[i] = '''demo.queue(

    max_size=10,

    api_open=False

).launch(

    server_name="0.0.0.0",

    server_port=7860,

    show_error=True

)

'''
            break

    with open(app_path, 'w') as f:
        f.writelines(lines)

    print("✓ Patched app.py for HF Spaces")

def create_spaces_requirements():
    """Create lightweight requirements.txt for Spaces"""
    requirements = '''# TranscriptorAI - HF Spaces Dependencies

gradio>=4.0.0

huggingface_hub>=0.19.0

python-docx>=1.0.0

pdfplumber>=0.10.0

pandas>=2.0.0

reportlab>=4.0.0

tiktoken>=0.5.0

nltk>=3.8.0

scikit-learn>=1.3.0



# Do NOT install these on Spaces (use API instead):

# transformers

# torch

# torchaudio

'''

    with open('requirements.txt', 'w') as f:
        f.write(requirements)

    print("✓ Created lightweight requirements.txt")

def create_spaces_readme():
    """Create README for Spaces"""
    readme = '''---

title: TranscriptorAI Enhanced

emoji: 📝

colorFrom: blue

colorTo: green

sdk: gradio

sdk_version: 4.0.0

app_file: app.py

pinned: false

license: mit

hardware: cpu-basic

---



# TranscriptorAI Enhanced - HuggingFace Spaces Edition



Enterprise-grade transcript analysis with AI-powered insights.



## ⚠️ Important Notes for Spaces Users



1. **Process 1-3 transcripts at a time** to avoid timeouts

2. **Set your HuggingFace token** in Space secrets:

   - Go to Settings → Repository secrets

   - Add: `HUGGINGFACE_TOKEN` = your token

   - Get token at: https://huggingface.co/settings/tokens



3. **Expected processing time**: 30-60 seconds per transcript



## Usage



1. Upload 1-3 transcript files (.txt, .docx, or .pdf)

2. Select interviewee type (HCP/Patient/Other)

3. Click "Analyze"

4. Wait 30-60 seconds

5. Download CSV and PDF reports



## Features



- ✅ Automated transcript analysis

- ✅ Structured data extraction

- ✅ Quality scoring

- ✅ Cross-transcript synthesis

- ✅ PDF/CSV/HTML reports

- ✅ Data tables and visualizations



## Optimizations for Spaces



- Uses HuggingFace Inference API (no local model loading)

- Lightweight Mistral-7B model

- Reduced token requirements

- Aggressive timeout protection

- Queue system for stability



For more information, visit: [GitHub Repository](#)

'''

    with open('README.md', 'w') as f:
        f.write(readme)

    print("✓ Created Spaces-optimized README.md")

def main():
    print("=" * 70)
    print("  Patching TranscriptorAI for HuggingFace Spaces")
    print("=" * 70)
    print()

    try:
        patch_config()
        patch_app()
        create_spaces_requirements()
        create_spaces_readme()

        print()
        print("=" * 70)
        print("✅ PATCHING COMPLETE")
        print("=" * 70)
        print()
        print("NEXT STEPS:")
        print("1. Push code to your HuggingFace Space")
        print("2. In Space settings, add secret:")
        print("   Name: HUGGINGFACE_TOKEN")
        print("   Value: <your HF token>")
        print("3. (Optional) Upgrade hardware to 'cpu-upgrade' for better timeout limits")
        print()
        print("The app will now:")
        print("  ✓ Use HF API (no local model loading)")
        print("  ✓ Process with 25s timeout (under Spaces limit)")
        print("  ✓ Use lightweight Mistral-7B model")
        print("  ✓ Queue requests to prevent crashes")
        print()

    except Exception as e:
        print(f"✗ Error during patching: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()