|
|
| """
|
| Patch TranscriptorAI for HuggingFace Spaces deployment
|
| Fixes timeout issues by using HF API instead of local models
|
| """
|
|
|
| import os
|
| import sys
|
|
|
| def patch_config():
|
| """Patch config.py for Spaces"""
|
| config_path = "config.py"
|
|
|
| with open(config_path, 'r') as f:
|
| content = f.read()
|
|
|
|
|
| content = content.replace(
|
| 'LLM_BACKEND = os.getenv("LLM_BACKEND", "hf_api")',
|
| 'LLM_BACKEND = "hf_api" # Forced for HF Spaces'
|
| )
|
|
|
|
|
| content = content.replace(
|
| 'HF_MODEL = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")',
|
| 'HF_MODEL = "mistralai/Mistral-7B-Instruct-v0.2" # Lighter for Spaces'
|
| )
|
|
|
|
|
| content = content.replace(
|
| 'LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120"))',
|
| 'LLM_TIMEOUT = 25 # Spaces timeout limit'
|
| )
|
|
|
|
|
| content = content.replace(
|
| 'MAX_TOKENS_PER_REQUEST = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300"))',
|
| 'MAX_TOKENS_PER_REQUEST = 100 # Faster for Spaces'
|
| )
|
|
|
|
|
| content = content.replace(
|
| 'MAX_CHUNK_TOKENS = int(os.getenv("MAX_CHUNK_TOKENS", "6000"))',
|
| 'MAX_CHUNK_TOKENS = 2000 # Lighter for Spaces'
|
| )
|
|
|
| with open(config_path, 'w') as f:
|
| f.write(content)
|
|
|
| print("β Patched config.py for HF Spaces")
|
|
|
| def patch_app():
|
| """Patch app.py for Spaces"""
|
| app_path = "app.py"
|
|
|
| with open(app_path, 'r') as f:
|
| lines = f.readlines()
|
|
|
|
|
| spaces_config = '''# HuggingFace Spaces Configuration
|
| import os
|
| os.environ["LLM_BACKEND"] = "hf_api"
|
| os.environ["LLM_TIMEOUT"] = "25"
|
| os.environ["MAX_TOKENS_PER_REQUEST"] = "100"
|
| print("π Running on HuggingFace Spaces - Optimized Configuration Loaded")
|
|
|
| '''
|
|
|
|
|
| import_end = 0
|
| for i, line in enumerate(lines):
|
| if line.startswith('import') or line.startswith('from'):
|
| import_end = i + 1
|
| elif import_end > 0 and not line.strip():
|
| break
|
|
|
| lines.insert(import_end + 1, spaces_config)
|
|
|
|
|
| for i, line in enumerate(lines):
|
| if '.launch()' in line or 'demo.launch()' in line:
|
|
|
| lines[i] = '''demo.queue(
|
| max_size=10,
|
| api_open=False
|
| ).launch(
|
| server_name="0.0.0.0",
|
| server_port=7860,
|
| show_error=True
|
| )
|
| '''
|
| break
|
|
|
| with open(app_path, 'w') as f:
|
| f.writelines(lines)
|
|
|
| print("β Patched app.py for HF Spaces")
|
|
|
| def create_spaces_requirements():
|
| """Create lightweight requirements.txt for Spaces"""
|
| requirements = '''# TranscriptorAI - HF Spaces Dependencies
|
| gradio>=4.0.0
|
| huggingface_hub>=0.19.0
|
| python-docx>=1.0.0
|
| pdfplumber>=0.10.0
|
| pandas>=2.0.0
|
| reportlab>=4.0.0
|
| tiktoken>=0.5.0
|
| nltk>=3.8.0
|
| scikit-learn>=1.3.0
|
|
|
| # Do NOT install these on Spaces (use API instead):
|
| # transformers
|
| # torch
|
| # torchaudio
|
| '''
|
|
|
| with open('requirements.txt', 'w') as f:
|
| f.write(requirements)
|
|
|
| print("β Created lightweight requirements.txt")
|
|
|
| def create_spaces_readme():
|
| """Create README for Spaces"""
|
| readme = '''---
|
| title: TranscriptorAI Enhanced
|
| emoji: π
|
| colorFrom: blue
|
| colorTo: green
|
| sdk: gradio
|
| sdk_version: 4.0.0
|
| app_file: app.py
|
| pinned: false
|
| license: mit
|
| hardware: cpu-basic
|
| ---
|
|
|
| # TranscriptorAI Enhanced - HuggingFace Spaces Edition
|
|
|
| Enterprise-grade transcript analysis with AI-powered insights.
|
|
|
| ## β οΈ Important Notes for Spaces Users
|
|
|
| 1. **Process 1-3 transcripts at a time** to avoid timeouts
|
| 2. **Set your HuggingFace token** in Space secrets:
|
| - Go to Settings β Repository secrets
|
| - Add: `HUGGINGFACE_TOKEN` = your token
|
| - Get token at: https://huggingface.co/settings/tokens
|
|
|
| 3. **Expected processing time**: 30-60 seconds per transcript
|
|
|
| ## Usage
|
|
|
| 1. Upload 1-3 transcript files (.txt, .docx, or .pdf)
|
| 2. Select interviewee type (HCP/Patient/Other)
|
| 3. Click "Analyze"
|
| 4. Wait 30-60 seconds
|
| 5. Download CSV and PDF reports
|
|
|
| ## Features
|
|
|
| - β
Automated transcript analysis
|
| - β
Structured data extraction
|
| - β
Quality scoring
|
| - β
Cross-transcript synthesis
|
| - β
PDF/CSV/HTML reports
|
| - β
Data tables and visualizations
|
|
|
| ## Optimizations for Spaces
|
|
|
| - Uses HuggingFace Inference API (no local model loading)
|
| - Lightweight Mistral-7B model
|
| - Reduced token requirements
|
| - Aggressive timeout protection
|
| - Queue system for stability
|
|
|
| For more information, visit: [GitHub Repository](#)
|
| '''
|
|
|
| with open('README.md', 'w') as f:
|
| f.write(readme)
|
|
|
| print("β Created Spaces-optimized README.md")
|
|
|
| def main():
|
| print("=" * 70)
|
| print(" Patching TranscriptorAI for HuggingFace Spaces")
|
| print("=" * 70)
|
| print()
|
|
|
| try:
|
| patch_config()
|
| patch_app()
|
| create_spaces_requirements()
|
| create_spaces_readme()
|
|
|
| print()
|
| print("=" * 70)
|
| print("β
PATCHING COMPLETE")
|
| print("=" * 70)
|
| print()
|
| print("NEXT STEPS:")
|
| print("1. Push code to your HuggingFace Space")
|
| print("2. In Space settings, add secret:")
|
| print(" Name: HUGGINGFACE_TOKEN")
|
| print(" Value: <your HF token>")
|
| print("3. (Optional) Upgrade hardware to 'cpu-upgrade' for better timeout limits")
|
| print()
|
| print("The app will now:")
|
| print(" β Use HF API (no local model loading)")
|
| print(" β Process with 25s timeout (under Spaces limit)")
|
| print(" β Use lightweight Mistral-7B model")
|
| print(" β Queue requests to prevent crashes")
|
| print()
|
|
|
| except Exception as e:
|
| print(f"β Error during patching: {e}")
|
| sys.exit(1)
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|