Spaces:

alex4cip
/

simple-chat

Sleeping

alex4cip commited on Oct 29, 2025

Commit

c7bf517

1 Parent(s): 476edda

test: Deploy minimal echo version to debug 500 errors

Temporarily replace with minimal version:
- No AI models
- No transformers/torch dependencies
- Simple echo functionality
- Pure Gradio Blocks with queue=False

This will help isolate whether the issue is:
1. HF Spaces infrastructure
2. Model loading
3. Gradio configuration
4. Python dependencies

If this works, we can incrementally add features back.

Files changed (1) hide show

app.py +23 -387

app.py CHANGED Viewed

@@ -1,398 +1,34 @@
 """
-Hugging Face LLM Chatbot with Gradio
-Using transformers library to run models locally
 """
-import os
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-# Get HF token from environment (Spaces uses Secrets, local uses .env)
-HF_TOKEN = os.getenv("HF_TOKEN", None)
-# Check device
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# Available models (optimized for local execution)
-MODELS = {
-    "microsoft/DialoGPT-small": {
-        "name": "DialoGPT Small (영어, 빠름)",
-        "max_length": 80,
-        "language": "en",
-    },
-    "microsoft/DialoGPT-medium": {
-        "name": "DialoGPT Medium (영어, 고품질)",
-        "max_length": 100,
-        "language": "en",
-    },
-    "gpt2": {
-        "name": "GPT-2 (영어, 범용)",
-        "max_length": 80,
-        "language": "en",
-    },
-    "beomi/llama-2-ko-7b": {
-        "name": "Llama-2-Ko 7B (한글 대화형, ⚠️ 14GB+ RAM 필요)",
-        "max_length": 150,
-        "language": "ko",
-        "warning": "이 모델은 14GB 이상의 메모리가 필요합니다. HF Spaces 무료 tier에서는 메모리 부족으로 실행되지 않을 수 있습니다.",
-    },
-    "kyujinpy/KoT-Llama2-7B-Chat": {
-        "name": "KoT-Llama2-7B-Chat (한글 대화, ⚠️ 14GB+ RAM 필요)",
-        "max_length": 150,
-        "language": "ko",
-        "warning": "이 모델은 14GB 이상의 메모리가 필요합니다. HF Spaces 무료 tier에서는 메모리 부족으로 실행되지 않을 수 있습니다.",
-    },
-    "beomi/KoAlpaca-Polyglot-5.8B": {
-        "name": "KoAlpaca 5.8B (한글 대화형, ⚠️ 12GB+ RAM 필요)",
-        "max_length": 150,
-        "language": "ko",
-        "warning": "이 모델은 12GB 이상의 메모리가 필요합니다. HF Spaces 무료 tier에서는 메모리 부족으로 실행되지 않을 수 있습니다.",
-    },
-    "nlpai-lab/kullm-polyglot-5.8b-v2": {
-        "name": "KULLM-Polyglot 5.8B (한글 대화, ⚠️ 12GB+ RAM 필요)",
-        "max_length": 150,
-        "language": "ko",
-        "warning": "이 모델은 12GB 이상의 메모리가 필요합니다. HF Spaces 무료 tier에서는 메모리 부족으로 실행되지 않을 수 있습니다.",
-    },
-}
-# Model cache
-loaded_models = {}
-loaded_tokenizers = {}
-def load_model(model_name):
-    """Load model and tokenizer"""
-    if model_name not in loaded_models:
-        try:
-            print(f"Loading model: {model_name}")
-            # Load tokenizer
-            tokenizer = AutoTokenizer.from_pretrained(
-                model_name,
-                token=HF_TOKEN,
-                padding_side='left',
-                trust_remote_code=True
-            )
-            # Add pad token if missing
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            # Load model with safetensors support
-            # Note: torch_dtype is deprecated but some models don't support dtype yet
-            import warnings
-            warnings.filterwarnings('ignore', message='.*torch_dtype.*deprecated.*')
-            try:
-                model = AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    token=HF_TOKEN,
-                    torch_dtype=torch.float32,
-                    low_cpu_mem_usage=True,
-                    trust_remote_code=True,
-                    use_safetensors=True
-                )
-            except Exception as e:
-                # Fallback to default loading if safetensors fails
-                print(f"⚠️ Safetensors loading failed, trying default method: {e}")
-                model = AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    token=HF_TOKEN,
-                    torch_dtype=torch.float32,
-                    low_cpu_mem_usage=True,
-                    trust_remote_code=True
-                )
-            model.to(device)
-            model.eval()
-            loaded_models[model_name] = model
-            loaded_tokenizers[model_name] = tokenizer
-            print(f"✅ Model {model_name} loaded successfully")
-        except Exception as e:
-            print(f"❌ Failed to load model {model_name}: {e}")
-            return None, None
-    return loaded_models.get(model_name), loaded_tokenizers.get(model_name)
-def chat_response(message, history, model_name):
-    """
-    Generate chatbot response - Returns updated history (for Blocks)
-    Args:
-        message: User input
-        history: Chat history as list of [user_msg, bot_msg] pairs
-        model_name: Selected model
-    Returns:
-        Updated history list
-    """
     if not message or not message.strip():
         return history
-    try:
-        # Load model and tokenizer
-        model, tokenizer = load_model(model_name)
-        if model is None or tokenizer is None:
-            return history + [[message, f"❌ 모델 '{model_name}'을 로드할 수 없습니다."]]
-        model_config = MODELS[model_name]
-        # Build conversation context from history
-        conversation = ""
-        for user_msg, bot_msg in history:
-            if user_msg:
-                conversation += f"{user_msg}\n"
-            if bot_msg:
-                conversation += f"{bot_msg}\n"
-        # Add current message
-        conversation += f"{message}\n"
-        # Tokenize
-        inputs = tokenizer.encode(conversation, return_tensors="pt").to(device)
-        # Generate response
-        with torch.no_grad():
-            outputs = model.generate(
-                inputs,
-                max_new_tokens=model_config["max_length"],
-                temperature=0.9,
-                do_sample=True,
-                pad_token_id=tokenizer.pad_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-            )
-        # Decode response
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Remove the input prompt from response
-        response = response[len(conversation):].strip()
-        # If empty, return a default message
-        if not response:
-            response = "I understand. Could you tell me more?"
-        return history + [[message, response]]
-    except Exception as e:
-        import traceback
-        error_msg = str(e)
-        error_type = type(e).__name__
-        print("=" * 50)
-        print(f"Error Type: {error_type}")
-        print(f"Error Message: {error_msg}")
-        print(f"Traceback:\n{traceback.format_exc()}")
-        print("=" * 50)
-        if "out of memory" in error_msg.lower() or "oom" in error_msg.lower():
-            return history + [[message, "❌ 메모리 부족. 더 작은 모델을 선택하거나 앱을 재시작하세요."]]
-        elif "cuda" in error_msg.lower() and device == "cpu":
-            return history + [[message, "⚠️ GPU 없이 CPU로 실행 중입니다. 응답이 느릴 수 있습니다."]]
-        else:
-            return history + [[message, f"❌ 오류: {error_type}\n{error_msg[:200]}"]]
-# Global state
-current_model = "microsoft/DialoGPT-small"
-# DO NOT preload model - load on first use to avoid startup memory issues
-print("✅ App initialized - models will be loaded on first use")
-# Create Gradio interface
-with gr.Blocks(
-    title="🤖 Hugging Face Chatbot",
-    theme=gr.themes.Soft(),
-    css="""
-        /* Make input textbox more visible */
-        .chatbot-input textarea {
-            border: 2px solid #2563eb !important;
-            border-radius: 8px !important;
-            background-color: #f8fafc !important;
-            font-size: 16px !important;
-            padding: 12px !important;
-            box-shadow: 0 2px 4px rgba(37, 99, 235, 0.1) !important;
-        }
-        .chatbot-input textarea:focus {
-            border-color: #1d4ed8 !important;
-            background-color: #ffffff !important;
-            box-shadow: 0 4px 8px rgba(37, 99, 235, 0.2) !important;
-            outline: none !important;
-        }
-        .chatbot-input textarea::placeholder {
-            color: #64748b !important;
-            font-style: italic !important;
-        }
-    """
-) as demo:
-    gr.Markdown(
-        """
-        # 🤖 Hugging Face LLM Chatbot
-        **로컬 모델 실행 방식** - API 제한 없음!
-        **사용 방법:**
-        1. 모델을 선택하세요 (처음에는 로딩 시간 필요)
-        2. 메시지를 입력하고 대화하세요
-        3. CPU에서 실행되므로 응답이 조금 느릴 수 있습니다
-        **언어별 추천 모델:**
-        - 🇬🇧 영어: DialoGPT, GPT-2
-        - 🇰🇷 한글: KoGPT-2, KoAlpaca (5.8B는 큰 모델, 느림)
-        **장점:** API 제한 없음, 완전 무료, 오프라인 작동 가능
-        """
-    )
-    # Model selector
-    model_dropdown = gr.Dropdown(
-        choices=[(config["name"], model_id) for model_id, config in MODELS.items()],
-        value="microsoft/DialoGPT-small",
-        label="🎯 모델 선택",
-        info="모델을 변경하면 새 모델을 다운로드합니다 (처음 한 번만)",
-    )
-    # Warning message for model requirements
-    model_warning = gr.Markdown("", visible=False)
-    # Chat interface using pure Blocks (NO ChatInterface to avoid SSE issues)
-    chatbot_display = gr.Chatbot(
-        height=500,
-        label="💬 대화",
-        show_label=False,
-        type="tuples",  # Use tuple format [[user_msg, bot_msg], ...]
-    )
     with gr.Row():
-        msg_input = gr.Textbox(
-            placeholder="💬 메시���를 입력하세요 (영어 권장)...",
-            show_label=False,
-            scale=9,
-            autofocus=True,
-            elem_classes="chatbot-input",
-        )
-        submit_btn = gr.Button("전송", scale=1, variant="primary")
-    clear_btn = gr.Button("🗑️ 대화 초기화", size="sm")
-    # Message submission handler
-    def submit_message(message, history, model):
-        updated_history = chat_response(message, history, model)
-        return updated_history, ""  # Return updated history and clear input
-    # Button click event (NO queue - explicitly disabled)
-    submit_btn.click(
-        fn=submit_message,
-        inputs=[msg_input, chatbot_display, model_dropdown],
-        outputs=[chatbot_display, msg_input],
-        queue=False,  # Explicitly disable queue for this event
-    )
-    # Enter key event
-    msg_input.submit(
-        fn=submit_message,
-        inputs=[msg_input, chatbot_display, model_dropdown],
-        outputs=[chatbot_display, msg_input],
-        queue=False,  # Explicitly disable queue for this event
-    )
-    # Clear button
-    clear_btn.click(
-        fn=lambda: [],
-        outputs=chatbot_display,
-        queue=False,  # Explicitly disable queue for this event
-    )
-    # Examples section with model switching
-    gr.Markdown("### 💡 예제 (클릭하면 모델과 메시지가 자동으로 설정됩니다)")
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("**영어 예제:**")
-            example_btn_1 = gr.Button("👋 Hello! How are you?", size="sm")
-            example_btn_2 = gr.Button("😄 Tell me a joke", size="sm")
-        with gr.Column(scale=1):
-            gr.Markdown("**한글 예제:**")
-            example_btn_3 = gr.Button("🌤️ 안녕하세요! 오늘 날씨가 어때요?", size="sm")
-            example_btn_4 = gr.Button("🤖 인공지능에 대해 간단히 설명해주세요.", size="sm")
-    # Example button click handlers
-    def set_example_1():
-        return "microsoft/DialoGPT-small", "Hello! How are you?"
-    def set_example_2():
-        return "microsoft/DialoGPT-medium", "Tell me a joke"
-    def set_example_3():
-        return "beomi/llama-2-ko-7b", "안녕하세요! 오늘 날씨가 어때요?"
-    def set_example_4():
-        return "kyujinpy/KoT-Llama2-7B-Chat", "인공지능에 대해 간단히 설명해주세요."
-    example_btn_1.click(set_example_1, outputs=[model_dropdown, msg_input], queue=False)
-    example_btn_2.click(set_example_2, outputs=[model_dropdown, msg_input], queue=False)
-    example_btn_3.click(set_example_3, outputs=[model_dropdown, msg_input], queue=False)
-    example_btn_4.click(set_example_4, outputs=[model_dropdown, msg_input], queue=False)
-    # Show warning and clear chat when model changes
-    def on_model_change(new_model):
-        global current_model
-        current_model = new_model
-        # Check if model has warning
-        warning_text = ""
-        warning_visible = False
-        if "warning" in MODELS[new_model]:
-            warning_text = f"⚠️ **경고**: {MODELS[new_model]['warning']}"
-            warning_visible = True
-        # Preload new model
-        load_model(new_model)
-        # Return: empty chat history, warning text, warning visibility
-        return [], warning_text, gr.update(visible=warning_visible)
-    model_dropdown.change(
-        fn=on_model_change,
-        inputs=[model_dropdown],
-        outputs=[chatbot_display, model_warning, model_warning],
-        queue=False,  # Explicitly disable queue for model change
-    )
-    gr.Markdown(
-        """
-        ---
-        **⚠️ 참고:**
-        - 모델은 로컬에서 실행됩니다 (첫 실행 시 다운로드)
-        - CPU에서 실행되므로 GPU보다 느립니다
-        - 각 모델은 특정 언어에 최적화되어 있습니다
-        **💾 디스크 사용량:**
-        - DialoGPT-small: ~350MB
-        - DialoGPT-medium: ~800MB
-        - GPT-2: ~500MB
-        - KoGPT-2: ~500MB
-        - KoAlpaca-5.8B: ~12GB (큰 모델, 메모리 8GB+ 필요)
-        **💡 팁:**
-        - 영어 대화는 DialoGPT 추천
-        - 한글 대화는 KoGPT-2 추천 (KoAlpaca는 리소스 충분할 때만)
-        - 짧은 문장으로 대화하면 더 나은 결과
-        - 모델이 한 번 로드되면 다시 다운로드하지 않습니다
-        """
-    )
 if __name__ == "__main__":
-    # 큐 시스템을 호출하지 않음 (HTTP/2 SSE 오류 방지)
-    # Gradio 5.x에서는 .queue()를 호출하지 않으면 큐가 비활성화됨
     demo.launch()

 """
+Minimal test version for HF Spaces debugging
+No AI models, just echo functionality
 """
 import gradio as gr
+def echo_chat(message, history):
+    """Simple echo without any AI"""
     if not message or not message.strip():
         return history
+    return history + [[message, f"Echo: {message}"]]
+# Minimal Blocks interface
+with gr.Blocks(title="Test Chatbot") as demo:
+    gr.Markdown("# 🤖 Minimal Test Chatbot")
+    chatbot = gr.Chatbot(height=400, type="tuples")
     with gr.Row():
+        msg = gr.Textbox(placeholder="메시지 입력...", show_label=False, scale=9)
+        btn = gr.Button("전송", scale=1)
+    clear = gr.Button("초기화")
+    def submit(message, history):
+        return echo_chat(message, history), ""
+    btn.click(submit, [msg, chatbot], [chatbot, msg], queue=False)
+    msg.submit(submit, [msg, chatbot], [chatbot, msg], queue=False)
+    clear.click(lambda: [], outputs=chatbot, queue=False)
 if __name__ == "__main__":
     demo.launch()