Spaces:

alex4cip
/

simple-chat

Sleeping

alex4cip Claude commited on Oct 29

Commit

718425d

1 Parent(s): c7bf517

feat: Add single model (DialoGPT-small) for incremental testing

Incremental deployment strategy:
- Single model only: DialoGPT-small (~350MB)
- Lazy loading (no preload at startup)
- Simplified error handling with full traceback
- queue=False on all events
- Pure Blocks implementation

This version will help identify if the 500 errors are:
- Model loading issues
- Memory constraints
- Transformers/torch compatibility

If this works, we can add more models incrementally.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +158 -19

app.py CHANGED Viewed

@@ -1,34 +1,173 @@
 """
-Minimal test version for HF Spaces debugging
-No AI models, just echo functionality
 """
 import gradio as gr
-def echo_chat(message, history):
-    """Simple echo without any AI"""
     if not message or not message.strip():
         return history
-    return history + [[message, f"Echo: {message}"]]
-# Minimal Blocks interface
-with gr.Blocks(title="Test Chatbot") as demo:
-    gr.Markdown("# 🤖 Minimal Test Chatbot")
-    chatbot = gr.Chatbot(height=400, type="tuples")
     with gr.Row():
-        msg = gr.Textbox(placeholder="메시지 입력...", show_label=False, scale=9)
-        btn = gr.Button("전송", scale=1)
-    clear = gr.Button("초기화")
     def submit(message, history):
-        return echo_chat(message, history), ""
     btn.click(submit, [msg, chatbot], [chatbot, msg], queue=False)
     msg.submit(submit, [msg, chatbot], [chatbot, msg], queue=False)
     clear.click(lambda: [], outputs=chatbot, queue=False)
 if __name__ == "__main__":
     demo.launch()

 """
+Incremental version: Single model (DialoGPT-small only)
+Testing model loading on HF Spaces
 """
+import os
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import warnings
+# Suppress torch_dtype deprecation warning
+warnings.filterwarnings('ignore', message='.*torch_dtype.*deprecated.*')
+# Get HF token from environment
+HF_TOKEN = os.getenv("HF_TOKEN", None)
+# Check device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Single model only for testing
+MODELS = {
+    "microsoft/DialoGPT-small": {
+        "name": "DialoGPT Small (영어, 빠름)",
+        "max_length": 80,
+    },
+}
+# Model cache
+loaded_models = {}
+loaded_tokenizers = {}
+def load_model(model_name):
+    """Load model and tokenizer"""
+    if model_name not in loaded_models:
+        try:
+            print(f"Loading model: {model_name}")
+            # Load tokenizer
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                token=HF_TOKEN,
+                padding_side='left',
+            )
+            if tokenizer.pad_token is None:
+                tokenizer.pad_token = tokenizer.eos_token
+            # Load model
+            model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                token=HF_TOKEN,
+                torch_dtype=torch.float32,
+                low_cpu_mem_usage=True,
+            )
+            model.to(device)
+            model.eval()
+            loaded_models[model_name] = model
+            loaded_tokenizers[model_name] = tokenizer
+            print(f"✅ Model {model_name} loaded successfully")
+        except Exception as e:
+            print(f"❌ Failed to load model {model_name}: {e}")
+            import traceback
+            print(traceback.format_exc())
+            return None, None
+    return loaded_models.get(model_name), loaded_tokenizers.get(model_name)
+def chat_response(message, history):
+    """Generate chatbot response"""
     if not message or not message.strip():
         return history
+    try:
+        model_name = "microsoft/DialoGPT-small"
+        model, tokenizer = load_model(model_name)
+        if model is None or tokenizer is None:
+            return history + [[message, "❌ 모델을 로드할 수 없습니다."]]
+        model_config = MODELS[model_name]
+        # Build conversation context
+        conversation = ""
+        for user_msg, bot_msg in history:
+            if user_msg:
+                conversation += f"{user_msg}\n"
+            if bot_msg:
+                conversation += f"{bot_msg}\n"
+        conversation += f"{message}\n"
+        # Tokenize
+        inputs = tokenizer.encode(conversation, return_tensors="pt").to(device)
+        # Generate response
+        with torch.no_grad():
+            outputs = model.generate(
+                inputs,
+                max_new_tokens=model_config["max_length"],
+                temperature=0.9,
+                do_sample=True,
+                pad_token_id=tokenizer.pad_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+            )
+        # Decode response
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = response[len(conversation):].strip()
+        if not response:
+            response = "I understand. Could you tell me more?"
+        return history + [[message, response]]
+    except Exception as e:
+        import traceback
+        error_msg = str(e)
+        print("=" * 50)
+        print(f"Error: {error_msg}")
+        print(traceback.format_exc())
+        print("=" * 50)
+        return history + [[message, f"❌ 오류: {error_msg[:200]}"]]
+print("✅ App initialized - model will load on first use")
+# Create Gradio interface
+with gr.Blocks(title="🤖 Simple Chatbot") as demo:
+    gr.Markdown("""
+    # 🤖 Simple Chatbot (Single Model Test)
+    **Model**: DialoGPT Small (English conversation)
+    - First message will be slow (model loading)
+    - Subsequent messages will be faster
+    """)
+    chatbot = gr.Chatbot(height=400, type="tuples", show_label=False)
     with gr.Row():
+        msg = gr.Textbox(
+            placeholder="Type a message in English...",
+            show_label=False,
+            scale=9,
+        )
+        btn = gr.Button("Send", scale=1, variant="primary")
+    clear = gr.Button("🗑️ Clear Chat", size="sm")
     def submit(message, history):
+        return chat_response(message, history), ""
     btn.click(submit, [msg, chatbot], [chatbot, msg], queue=False)
     msg.submit(submit, [msg, chatbot], [chatbot, msg], queue=False)
     clear.click(lambda: [], outputs=chatbot, queue=False)
+    gr.Markdown("""
+    ---
+    **Note**:
+    - This is a test version with only one model
+    - First response will take 5-10 seconds (model loading)
+    - Uses DialoGPT-small (~350MB)
+    """)
 if __name__ == "__main__":
     demo.launch()