File size: 6,003 Bytes
e3dec4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env python3
"""

Patch TranscriptorAI for HuggingFace Spaces deployment

Fixes timeout issues by using HF API instead of local models

"""

import os
import sys

def patch_config():
    """Patch config.py for Spaces"""
    config_path = "config.py"

    with open(config_path, 'r') as f:
        content = f.read()

    # Force HF API backend
    content = content.replace(
        'LLM_BACKEND = os.getenv("LLM_BACKEND", "hf_api")',
        'LLM_BACKEND = "hf_api"  # Forced for HF Spaces'
    )

    # Use lighter model
    content = content.replace(
        'HF_MODEL = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")',
        'HF_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"  # Lighter for Spaces'
    )

    # Reduce timeouts
    content = content.replace(
        'LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120"))',
        'LLM_TIMEOUT = 25  # Spaces timeout limit'
    )

    # Reduce tokens
    content = content.replace(
        'MAX_TOKENS_PER_REQUEST = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300"))',
        'MAX_TOKENS_PER_REQUEST = 100  # Faster for Spaces'
    )

    # Reduce chunk size
    content = content.replace(
        'MAX_CHUNK_TOKENS = int(os.getenv("MAX_CHUNK_TOKENS", "6000"))',
        'MAX_CHUNK_TOKENS = 2000  # Lighter for Spaces'
    )

    with open(config_path, 'w') as f:
        f.write(content)

    print("βœ“ Patched config.py for HF Spaces")

def patch_app():
    """Patch app.py for Spaces"""
    app_path = "app.py"

    with open(app_path, 'r') as f:
        lines = f.readlines()

    # Add Spaces configuration at top
    spaces_config = '''# HuggingFace Spaces Configuration

import os

os.environ["LLM_BACKEND"] = "hf_api"

os.environ["LLM_TIMEOUT"] = "25"

os.environ["MAX_TOKENS_PER_REQUEST"] = "100"

print("πŸš€ Running on HuggingFace Spaces - Optimized Configuration Loaded")



'''

    # Insert after imports
    import_end = 0
    for i, line in enumerate(lines):
        if line.startswith('import') or line.startswith('from'):
            import_end = i + 1
        elif import_end > 0 and not line.strip():
            break

    lines.insert(import_end + 1, spaces_config)

    # Find and modify .launch()
    for i, line in enumerate(lines):
        if '.launch()' in line or 'demo.launch()' in line:
            # Replace with queued launch
            lines[i] = '''demo.queue(

    max_size=10,

    api_open=False

).launch(

    server_name="0.0.0.0",

    server_port=7860,

    show_error=True

)

'''
            break

    with open(app_path, 'w') as f:
        f.writelines(lines)

    print("βœ“ Patched app.py for HF Spaces")

def create_spaces_requirements():
    """Create lightweight requirements.txt for Spaces"""
    requirements = '''# TranscriptorAI - HF Spaces Dependencies

gradio>=4.0.0

huggingface_hub>=0.19.0

python-docx>=1.0.0

pdfplumber>=0.10.0

pandas>=2.0.0

reportlab>=4.0.0

tiktoken>=0.5.0

nltk>=3.8.0

scikit-learn>=1.3.0



# Do NOT install these on Spaces (use API instead):

# transformers

# torch

# torchaudio

'''

    with open('requirements.txt', 'w') as f:
        f.write(requirements)

    print("βœ“ Created lightweight requirements.txt")

def create_spaces_readme():
    """Create README for Spaces"""
    readme = '''---

title: TranscriptorAI Enhanced

emoji: πŸ“

colorFrom: blue

colorTo: green

sdk: gradio

sdk_version: 4.0.0

app_file: app.py

pinned: false

license: mit

hardware: cpu-basic

---



# TranscriptorAI Enhanced - HuggingFace Spaces Edition



Enterprise-grade transcript analysis with AI-powered insights.



## ⚠️ Important Notes for Spaces Users



1. **Process 1-3 transcripts at a time** to avoid timeouts

2. **Set your HuggingFace token** in Space secrets:

   - Go to Settings β†’ Repository secrets

   - Add: `HUGGINGFACE_TOKEN` = your token

   - Get token at: https://huggingface.co/settings/tokens



3. **Expected processing time**: 30-60 seconds per transcript



## Usage



1. Upload 1-3 transcript files (.txt, .docx, or .pdf)

2. Select interviewee type (HCP/Patient/Other)

3. Click "Analyze"

4. Wait 30-60 seconds

5. Download CSV and PDF reports



## Features



- βœ… Automated transcript analysis

- βœ… Structured data extraction

- βœ… Quality scoring

- βœ… Cross-transcript synthesis

- βœ… PDF/CSV/HTML reports

- βœ… Data tables and visualizations



## Optimizations for Spaces



- Uses HuggingFace Inference API (no local model loading)

- Lightweight Mistral-7B model

- Reduced token requirements

- Aggressive timeout protection

- Queue system for stability



For more information, visit: [GitHub Repository](#)

'''

    with open('README.md', 'w') as f:
        f.write(readme)

    print("βœ“ Created Spaces-optimized README.md")

def main():
    print("=" * 70)
    print("  Patching TranscriptorAI for HuggingFace Spaces")
    print("=" * 70)
    print()

    try:
        patch_config()
        patch_app()
        create_spaces_requirements()
        create_spaces_readme()

        print()
        print("=" * 70)
        print("βœ… PATCHING COMPLETE")
        print("=" * 70)
        print()
        print("NEXT STEPS:")
        print("1. Push code to your HuggingFace Space")
        print("2. In Space settings, add secret:")
        print("   Name: HUGGINGFACE_TOKEN")
        print("   Value: <your HF token>")
        print("3. (Optional) Upgrade hardware to 'cpu-upgrade' for better timeout limits")
        print()
        print("The app will now:")
        print("  βœ“ Use HF API (no local model loading)")
        print("  βœ“ Process with 25s timeout (under Spaces limit)")
        print("  βœ“ Use lightweight Mistral-7B model")
        print("  βœ“ Queue requests to prevent crashes")
        print()

    except Exception as e:
        print(f"βœ— Error during patching: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()