alex4cip commited on
Commit
c7bf517
·
1 Parent(s): 476edda

test: Deploy minimal echo version to debug 500 errors

Browse files

Temporarily replace with minimal version:
- No AI models
- No transformers/torch dependencies
- Simple echo functionality
- Pure Gradio Blocks with queue=False

This will help isolate whether the issue is:
1. HF Spaces infrastructure
2. Model loading
3. Gradio configuration
4. Python dependencies

If this works, we can incrementally add features back.

Files changed (1) hide show
  1. app.py +23 -387
app.py CHANGED
@@ -1,398 +1,34 @@
1
  """
2
- Hugging Face LLM Chatbot with Gradio
3
- Using transformers library to run models locally
4
  """
5
 
6
- import os
7
  import gradio as gr
8
- from transformers import AutoModelForCausalLM, AutoTokenizer
9
- import torch
10
 
11
- # Get HF token from environment (Spaces uses Secrets, local uses .env)
12
- HF_TOKEN = os.getenv("HF_TOKEN", None)
13
-
14
- # Check device
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
- print(f"Using device: {device}")
17
-
18
- # Available models (optimized for local execution)
19
- MODELS = {
20
- "microsoft/DialoGPT-small": {
21
- "name": "DialoGPT Small (영어, 빠름)",
22
- "max_length": 80,
23
- "language": "en",
24
- },
25
- "microsoft/DialoGPT-medium": {
26
- "name": "DialoGPT Medium (영어, 고품질)",
27
- "max_length": 100,
28
- "language": "en",
29
- },
30
- "gpt2": {
31
- "name": "GPT-2 (영어, 범용)",
32
- "max_length": 80,
33
- "language": "en",
34
- },
35
- "beomi/llama-2-ko-7b": {
36
- "name": "Llama-2-Ko 7B (한글 대화형, ⚠️ 14GB+ RAM 필요)",
37
- "max_length": 150,
38
- "language": "ko",
39
- "warning": "이 모델은 14GB 이상의 메모리가 필요합니다. HF Spaces 무료 tier에서는 메모리 부족으로 실행되지 않을 수 있습니다.",
40
- },
41
- "kyujinpy/KoT-Llama2-7B-Chat": {
42
- "name": "KoT-Llama2-7B-Chat (한글 대화, ⚠️ 14GB+ RAM 필요)",
43
- "max_length": 150,
44
- "language": "ko",
45
- "warning": "이 모델은 14GB 이상의 메모리가 필요합니다. HF Spaces 무료 tier에서는 메모리 부족으로 실행되지 않을 수 있습니다.",
46
- },
47
- "beomi/KoAlpaca-Polyglot-5.8B": {
48
- "name": "KoAlpaca 5.8B (한글 대화형, ⚠️ 12GB+ RAM 필요)",
49
- "max_length": 150,
50
- "language": "ko",
51
- "warning": "이 모델은 12GB 이상의 메모리가 필요합니다. HF Spaces 무료 tier에서는 메모리 부족으로 실행되지 않을 수 있습니다.",
52
- },
53
- "nlpai-lab/kullm-polyglot-5.8b-v2": {
54
- "name": "KULLM-Polyglot 5.8B (한글 대화, ⚠️ 12GB+ RAM 필요)",
55
- "max_length": 150,
56
- "language": "ko",
57
- "warning": "이 모델은 12GB 이상의 메모리가 필요합니다. HF Spaces 무료 tier에서는 메모리 부족으로 실행되지 않을 수 있습니다.",
58
- },
59
- }
60
-
61
- # Model cache
62
- loaded_models = {}
63
- loaded_tokenizers = {}
64
-
65
-
66
- def load_model(model_name):
67
- """Load model and tokenizer"""
68
- if model_name not in loaded_models:
69
- try:
70
- print(f"Loading model: {model_name}")
71
-
72
- # Load tokenizer
73
- tokenizer = AutoTokenizer.from_pretrained(
74
- model_name,
75
- token=HF_TOKEN,
76
- padding_side='left',
77
- trust_remote_code=True
78
- )
79
-
80
- # Add pad token if missing
81
- if tokenizer.pad_token is None:
82
- tokenizer.pad_token = tokenizer.eos_token
83
-
84
- # Load model with safetensors support
85
- # Note: torch_dtype is deprecated but some models don't support dtype yet
86
- import warnings
87
- warnings.filterwarnings('ignore', message='.*torch_dtype.*deprecated.*')
88
-
89
- try:
90
- model = AutoModelForCausalLM.from_pretrained(
91
- model_name,
92
- token=HF_TOKEN,
93
- torch_dtype=torch.float32,
94
- low_cpu_mem_usage=True,
95
- trust_remote_code=True,
96
- use_safetensors=True
97
- )
98
- except Exception as e:
99
- # Fallback to default loading if safetensors fails
100
- print(f"⚠️ Safetensors loading failed, trying default method: {e}")
101
- model = AutoModelForCausalLM.from_pretrained(
102
- model_name,
103
- token=HF_TOKEN,
104
- torch_dtype=torch.float32,
105
- low_cpu_mem_usage=True,
106
- trust_remote_code=True
107
- )
108
-
109
- model.to(device)
110
- model.eval()
111
-
112
- loaded_models[model_name] = model
113
- loaded_tokenizers[model_name] = tokenizer
114
-
115
- print(f"✅ Model {model_name} loaded successfully")
116
-
117
- except Exception as e:
118
- print(f"❌ Failed to load model {model_name}: {e}")
119
- return None, None
120
-
121
- return loaded_models.get(model_name), loaded_tokenizers.get(model_name)
122
-
123
-
124
- def chat_response(message, history, model_name):
125
- """
126
- Generate chatbot response - Returns updated history (for Blocks)
127
-
128
- Args:
129
- message: User input
130
- history: Chat history as list of [user_msg, bot_msg] pairs
131
- model_name: Selected model
132
-
133
- Returns:
134
- Updated history list
135
- """
136
  if not message or not message.strip():
137
  return history
138
-
139
- try:
140
- # Load model and tokenizer
141
- model, tokenizer = load_model(model_name)
142
-
143
- if model is None or tokenizer is None:
144
- return history + [[message, f"❌ 모델 '{model_name}'을 로드할 수 없습니다."]]
145
-
146
- model_config = MODELS[model_name]
147
-
148
- # Build conversation context from history
149
- conversation = ""
150
- for user_msg, bot_msg in history:
151
- if user_msg:
152
- conversation += f"{user_msg}\n"
153
- if bot_msg:
154
- conversation += f"{bot_msg}\n"
155
-
156
- # Add current message
157
- conversation += f"{message}\n"
158
-
159
- # Tokenize
160
- inputs = tokenizer.encode(conversation, return_tensors="pt").to(device)
161
-
162
- # Generate response
163
- with torch.no_grad():
164
- outputs = model.generate(
165
- inputs,
166
- max_new_tokens=model_config["max_length"],
167
- temperature=0.9,
168
- do_sample=True,
169
- pad_token_id=tokenizer.pad_token_id,
170
- eos_token_id=tokenizer.eos_token_id,
171
- )
172
-
173
- # Decode response
174
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
175
-
176
- # Remove the input prompt from response
177
- response = response[len(conversation):].strip()
178
-
179
- # If empty, return a default message
180
- if not response:
181
- response = "I understand. Could you tell me more?"
182
-
183
- return history + [[message, response]]
184
-
185
- except Exception as e:
186
- import traceback
187
- error_msg = str(e)
188
- error_type = type(e).__name__
189
-
190
- print("=" * 50)
191
- print(f"Error Type: {error_type}")
192
- print(f"Error Message: {error_msg}")
193
- print(f"Traceback:\n{traceback.format_exc()}")
194
- print("=" * 50)
195
-
196
- if "out of memory" in error_msg.lower() or "oom" in error_msg.lower():
197
- return history + [[message, "❌ 메모리 부족. 더 작은 모델을 선택하거나 앱을 재시작하세요."]]
198
- elif "cuda" in error_msg.lower() and device == "cpu":
199
- return history + [[message, "⚠️ GPU 없이 CPU로 실행 중입니다. 응답이 느릴 수 있습니다."]]
200
- else:
201
- return history + [[message, f"❌ 오류: {error_type}\n{error_msg[:200]}"]]
202
-
203
-
204
- # Global state
205
- current_model = "microsoft/DialoGPT-small"
206
-
207
- # DO NOT preload model - load on first use to avoid startup memory issues
208
- print("✅ App initialized - models will be loaded on first use")
209
-
210
- # Create Gradio interface
211
- with gr.Blocks(
212
- title="🤖 Hugging Face Chatbot",
213
- theme=gr.themes.Soft(),
214
- css="""
215
- /* Make input textbox more visible */
216
- .chatbot-input textarea {
217
- border: 2px solid #2563eb !important;
218
- border-radius: 8px !important;
219
- background-color: #f8fafc !important;
220
- font-size: 16px !important;
221
- padding: 12px !important;
222
- box-shadow: 0 2px 4px rgba(37, 99, 235, 0.1) !important;
223
- }
224
- .chatbot-input textarea:focus {
225
- border-color: #1d4ed8 !important;
226
- background-color: #ffffff !important;
227
- box-shadow: 0 4px 8px rgba(37, 99, 235, 0.2) !important;
228
- outline: none !important;
229
- }
230
- .chatbot-input textarea::placeholder {
231
- color: #64748b !important;
232
- font-style: italic !important;
233
- }
234
- """
235
- ) as demo:
236
- gr.Markdown(
237
- """
238
- # 🤖 Hugging Face LLM Chatbot
239
-
240
- **로컬 모델 실행 방식** - API 제한 없음!
241
-
242
- **사용 방법:**
243
- 1. 모델을 선택하세요 (처음에는 로딩 시간 필요)
244
- 2. 메시지를 입력하고 대화하세요
245
- 3. CPU에서 실행되므로 응답이 조금 느릴 수 있습니다
246
-
247
- **언어별 추천 모델:**
248
- - 🇬🇧 영어: DialoGPT, GPT-2
249
- - 🇰🇷 한글: KoGPT-2, KoAlpaca (5.8B는 큰 모델, 느림)
250
-
251
- **장점:** API 제한 없음, 완전 무료, 오프라인 작동 가능
252
- """
253
- )
254
-
255
- # Model selector
256
- model_dropdown = gr.Dropdown(
257
- choices=[(config["name"], model_id) for model_id, config in MODELS.items()],
258
- value="microsoft/DialoGPT-small",
259
- label="🎯 모델 선택",
260
- info="모델을 변경하면 새 모델을 다운로드합니다 (처음 한 번만)",
261
- )
262
-
263
- # Warning message for model requirements
264
- model_warning = gr.Markdown("", visible=False)
265
-
266
- # Chat interface using pure Blocks (NO ChatInterface to avoid SSE issues)
267
- chatbot_display = gr.Chatbot(
268
- height=500,
269
- label="💬 대화",
270
- show_label=False,
271
- type="tuples", # Use tuple format [[user_msg, bot_msg], ...]
272
- )
273
-
274
  with gr.Row():
275
- msg_input = gr.Textbox(
276
- placeholder="💬 메시���를 입력하세요 (영어 권장)...",
277
- show_label=False,
278
- scale=9,
279
- autofocus=True,
280
- elem_classes="chatbot-input",
281
- )
282
- submit_btn = gr.Button("전송", scale=1, variant="primary")
283
-
284
- clear_btn = gr.Button("🗑️ 대화 초기화", size="sm")
285
-
286
- # Message submission handler
287
- def submit_message(message, history, model):
288
- updated_history = chat_response(message, history, model)
289
- return updated_history, "" # Return updated history and clear input
290
-
291
- # Button click event (NO queue - explicitly disabled)
292
- submit_btn.click(
293
- fn=submit_message,
294
- inputs=[msg_input, chatbot_display, model_dropdown],
295
- outputs=[chatbot_display, msg_input],
296
- queue=False, # Explicitly disable queue for this event
297
- )
298
-
299
- # Enter key event
300
- msg_input.submit(
301
- fn=submit_message,
302
- inputs=[msg_input, chatbot_display, model_dropdown],
303
- outputs=[chatbot_display, msg_input],
304
- queue=False, # Explicitly disable queue for this event
305
- )
306
-
307
- # Clear button
308
- clear_btn.click(
309
- fn=lambda: [],
310
- outputs=chatbot_display,
311
- queue=False, # Explicitly disable queue for this event
312
- )
313
-
314
- # Examples section with model switching
315
- gr.Markdown("### 💡 예제 (클릭하면 모델과 메시지가 자동으로 설정됩니다)")
316
-
317
- with gr.Row():
318
- with gr.Column(scale=1):
319
- gr.Markdown("**영어 예제:**")
320
- example_btn_1 = gr.Button("👋 Hello! How are you?", size="sm")
321
- example_btn_2 = gr.Button("😄 Tell me a joke", size="sm")
322
-
323
- with gr.Column(scale=1):
324
- gr.Markdown("**한글 예제:**")
325
- example_btn_3 = gr.Button("🌤️ 안녕하세요! 오늘 날씨가 어때요?", size="sm")
326
- example_btn_4 = gr.Button("🤖 인공지능에 대해 간단히 설명해주세요.", size="sm")
327
-
328
- # Example button click handlers
329
- def set_example_1():
330
- return "microsoft/DialoGPT-small", "Hello! How are you?"
331
-
332
- def set_example_2():
333
- return "microsoft/DialoGPT-medium", "Tell me a joke"
334
-
335
- def set_example_3():
336
- return "beomi/llama-2-ko-7b", "안녕하세요! 오늘 날씨가 어때요?"
337
-
338
- def set_example_4():
339
- return "kyujinpy/KoT-Llama2-7B-Chat", "인공지능에 대해 간단히 설명해주세요."
340
-
341
- example_btn_1.click(set_example_1, outputs=[model_dropdown, msg_input], queue=False)
342
- example_btn_2.click(set_example_2, outputs=[model_dropdown, msg_input], queue=False)
343
- example_btn_3.click(set_example_3, outputs=[model_dropdown, msg_input], queue=False)
344
- example_btn_4.click(set_example_4, outputs=[model_dropdown, msg_input], queue=False)
345
-
346
- # Show warning and clear chat when model changes
347
- def on_model_change(new_model):
348
- global current_model
349
- current_model = new_model
350
-
351
- # Check if model has warning
352
- warning_text = ""
353
- warning_visible = False
354
- if "warning" in MODELS[new_model]:
355
- warning_text = f"⚠️ **경고**: {MODELS[new_model]['warning']}"
356
- warning_visible = True
357
-
358
- # Preload new model
359
- load_model(new_model)
360
-
361
- # Return: empty chat history, warning text, warning visibility
362
- return [], warning_text, gr.update(visible=warning_visible)
363
-
364
- model_dropdown.change(
365
- fn=on_model_change,
366
- inputs=[model_dropdown],
367
- outputs=[chatbot_display, model_warning, model_warning],
368
- queue=False, # Explicitly disable queue for model change
369
- )
370
-
371
- gr.Markdown(
372
- """
373
- ---
374
-
375
- **⚠️ 참고:**
376
- - 모델은 로컬에서 실행됩니다 (첫 실행 시 다운로드)
377
- - CPU에서 실행되므로 GPU보다 느립니다
378
- - 각 모델은 특정 언어에 최적화되어 있습니다
379
-
380
- **💾 디스크 사용량:**
381
- - DialoGPT-small: ~350MB
382
- - DialoGPT-medium: ~800MB
383
- - GPT-2: ~500MB
384
- - KoGPT-2: ~500MB
385
- - KoAlpaca-5.8B: ~12GB (큰 모델, 메모리 8GB+ 필요)
386
-
387
- **💡 팁:**
388
- - 영어 대화는 DialoGPT 추천
389
- - 한글 대화는 KoGPT-2 추천 (KoAlpaca는 리소스 충분할 때만)
390
- - 짧은 문장으로 대화하면 더 나은 결과
391
- - 모델이 한 번 로드되면 다시 다운로드하지 않습니다
392
- """
393
- )
394
 
395
  if __name__ == "__main__":
396
- # 큐 시스템을 호출하지 않음 (HTTP/2 SSE 오류 방지)
397
- # Gradio 5.x에서는 .queue()를 호출하지 않으면 큐가 비활성화됨
398
  demo.launch()
 
1
  """
2
+ Minimal test version for HF Spaces debugging
3
+ No AI models, just echo functionality
4
  """
5
 
 
6
  import gradio as gr
 
 
7
 
8
+ def echo_chat(message, history):
9
+ """Simple echo without any AI"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  if not message or not message.strip():
11
  return history
12
+ return history + [[message, f"Echo: {message}"]]
13
+
14
+ # Minimal Blocks interface
15
+ with gr.Blocks(title="Test Chatbot") as demo:
16
+ gr.Markdown("# 🤖 Minimal Test Chatbot")
17
+
18
+ chatbot = gr.Chatbot(height=400, type="tuples")
19
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  with gr.Row():
21
+ msg = gr.Textbox(placeholder="메시지 입력...", show_label=False, scale=9)
22
+ btn = gr.Button("전송", scale=1)
23
+
24
+ clear = gr.Button("초기화")
25
+
26
+ def submit(message, history):
27
+ return echo_chat(message, history), ""
28
+
29
+ btn.click(submit, [msg, chatbot], [chatbot, msg], queue=False)
30
+ msg.submit(submit, [msg, chatbot], [chatbot, msg], queue=False)
31
+ clear.click(lambda: [], outputs=chatbot, queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  if __name__ == "__main__":
 
 
34
  demo.launch()