alex4cip Claude commited on
Commit
718425d
Β·
1 Parent(s): c7bf517

feat: Add single model (DialoGPT-small) for incremental testing

Browse files

Incremental deployment strategy:
- Single model only: DialoGPT-small (~350MB)
- Lazy loading (no preload at startup)
- Simplified error handling with full traceback
- queue=False on all events
- Pure Blocks implementation

This version will help identify if the 500 errors are:
- Model loading issues
- Memory constraints
- Transformers/torch compatibility

If this works, we can add more models incrementally.

πŸ€– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +158 -19
app.py CHANGED
@@ -1,34 +1,173 @@
1
  """
2
- Minimal test version for HF Spaces debugging
3
- No AI models, just echo functionality
4
  """
5
 
 
6
  import gradio as gr
 
 
 
7
 
8
- def echo_chat(message, history):
9
- """Simple echo without any AI"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  if not message or not message.strip():
11
  return history
12
- return history + [[message, f"Echo: {message}"]]
13
-
14
- # Minimal Blocks interface
15
- with gr.Blocks(title="Test Chatbot") as demo:
16
- gr.Markdown("# πŸ€– Minimal Test Chatbot")
17
-
18
- chatbot = gr.Chatbot(height=400, type="tuples")
19
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  with gr.Row():
21
- msg = gr.Textbox(placeholder="λ©”μ‹œμ§€ μž…λ ₯...", show_label=False, scale=9)
22
- btn = gr.Button("전솑", scale=1)
23
-
24
- clear = gr.Button("μ΄ˆκΈ°ν™”")
25
-
 
 
 
 
26
  def submit(message, history):
27
- return echo_chat(message, history), ""
28
-
29
  btn.click(submit, [msg, chatbot], [chatbot, msg], queue=False)
30
  msg.submit(submit, [msg, chatbot], [chatbot, msg], queue=False)
31
  clear.click(lambda: [], outputs=chatbot, queue=False)
32
 
 
 
 
 
 
 
 
 
33
  if __name__ == "__main__":
34
  demo.launch()
 
1
  """
2
+ Incremental version: Single model (DialoGPT-small only)
3
+ Testing model loading on HF Spaces
4
  """
5
 
6
+ import os
7
  import gradio as gr
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer
9
+ import torch
10
+ import warnings
11
 
12
+ # Suppress torch_dtype deprecation warning
13
+ warnings.filterwarnings('ignore', message='.*torch_dtype.*deprecated.*')
14
+
15
+ # Get HF token from environment
16
+ HF_TOKEN = os.getenv("HF_TOKEN", None)
17
+
18
+ # Check device
19
+ device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ print(f"Using device: {device}")
21
+
22
+ # Single model only for testing
23
+ MODELS = {
24
+ "microsoft/DialoGPT-small": {
25
+ "name": "DialoGPT Small (μ˜μ–΄, 빠름)",
26
+ "max_length": 80,
27
+ },
28
+ }
29
+
30
+ # Model cache
31
+ loaded_models = {}
32
+ loaded_tokenizers = {}
33
+
34
+
35
+ def load_model(model_name):
36
+ """Load model and tokenizer"""
37
+ if model_name not in loaded_models:
38
+ try:
39
+ print(f"Loading model: {model_name}")
40
+
41
+ # Load tokenizer
42
+ tokenizer = AutoTokenizer.from_pretrained(
43
+ model_name,
44
+ token=HF_TOKEN,
45
+ padding_side='left',
46
+ )
47
+
48
+ if tokenizer.pad_token is None:
49
+ tokenizer.pad_token = tokenizer.eos_token
50
+
51
+ # Load model
52
+ model = AutoModelForCausalLM.from_pretrained(
53
+ model_name,
54
+ token=HF_TOKEN,
55
+ torch_dtype=torch.float32,
56
+ low_cpu_mem_usage=True,
57
+ )
58
+
59
+ model.to(device)
60
+ model.eval()
61
+
62
+ loaded_models[model_name] = model
63
+ loaded_tokenizers[model_name] = tokenizer
64
+
65
+ print(f"βœ… Model {model_name} loaded successfully")
66
+
67
+ except Exception as e:
68
+ print(f"❌ Failed to load model {model_name}: {e}")
69
+ import traceback
70
+ print(traceback.format_exc())
71
+ return None, None
72
+
73
+ return loaded_models.get(model_name), loaded_tokenizers.get(model_name)
74
+
75
+
76
+ def chat_response(message, history):
77
+ """Generate chatbot response"""
78
  if not message or not message.strip():
79
  return history
80
+
81
+ try:
82
+ model_name = "microsoft/DialoGPT-small"
83
+ model, tokenizer = load_model(model_name)
84
+
85
+ if model is None or tokenizer is None:
86
+ return history + [[message, "❌ λͺ¨λΈμ„ λ‘œλ“œν•  수 μ—†μŠ΅λ‹ˆλ‹€."]]
87
+
88
+ model_config = MODELS[model_name]
89
+
90
+ # Build conversation context
91
+ conversation = ""
92
+ for user_msg, bot_msg in history:
93
+ if user_msg:
94
+ conversation += f"{user_msg}\n"
95
+ if bot_msg:
96
+ conversation += f"{bot_msg}\n"
97
+
98
+ conversation += f"{message}\n"
99
+
100
+ # Tokenize
101
+ inputs = tokenizer.encode(conversation, return_tensors="pt").to(device)
102
+
103
+ # Generate response
104
+ with torch.no_grad():
105
+ outputs = model.generate(
106
+ inputs,
107
+ max_new_tokens=model_config["max_length"],
108
+ temperature=0.9,
109
+ do_sample=True,
110
+ pad_token_id=tokenizer.pad_token_id,
111
+ eos_token_id=tokenizer.eos_token_id,
112
+ )
113
+
114
+ # Decode response
115
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
116
+ response = response[len(conversation):].strip()
117
+
118
+ if not response:
119
+ response = "I understand. Could you tell me more?"
120
+
121
+ return history + [[message, response]]
122
+
123
+ except Exception as e:
124
+ import traceback
125
+ error_msg = str(e)
126
+ print("=" * 50)
127
+ print(f"Error: {error_msg}")
128
+ print(traceback.format_exc())
129
+ print("=" * 50)
130
+ return history + [[message, f"❌ 였λ₯˜: {error_msg[:200]}"]]
131
+
132
+
133
+ print("βœ… App initialized - model will load on first use")
134
+
135
+ # Create Gradio interface
136
+ with gr.Blocks(title="πŸ€– Simple Chatbot") as demo:
137
+ gr.Markdown("""
138
+ # πŸ€– Simple Chatbot (Single Model Test)
139
+
140
+ **Model**: DialoGPT Small (English conversation)
141
+ - First message will be slow (model loading)
142
+ - Subsequent messages will be faster
143
+ """)
144
+
145
+ chatbot = gr.Chatbot(height=400, type="tuples", show_label=False)
146
+
147
  with gr.Row():
148
+ msg = gr.Textbox(
149
+ placeholder="Type a message in English...",
150
+ show_label=False,
151
+ scale=9,
152
+ )
153
+ btn = gr.Button("Send", scale=1, variant="primary")
154
+
155
+ clear = gr.Button("πŸ—‘οΈ Clear Chat", size="sm")
156
+
157
  def submit(message, history):
158
+ return chat_response(message, history), ""
159
+
160
  btn.click(submit, [msg, chatbot], [chatbot, msg], queue=False)
161
  msg.submit(submit, [msg, chatbot], [chatbot, msg], queue=False)
162
  clear.click(lambda: [], outputs=chatbot, queue=False)
163
 
164
+ gr.Markdown("""
165
+ ---
166
+ **Note**:
167
+ - This is a test version with only one model
168
+ - First response will take 5-10 seconds (model loading)
169
+ - Uses DialoGPT-small (~350MB)
170
+ """)
171
+
172
  if __name__ == "__main__":
173
  demo.launch()