Rajhuggingface4253 commited on
Commit
e51d0e4
·
verified ·
1 Parent(s): 3f62f30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -19
app.py CHANGED
@@ -23,16 +23,17 @@ regular_servers = [
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
- # FIXED: Models with CORRECT prompt format that matches your model's expectations
27
  models_to_warm = [
28
  {
29
  "name": "Qwen 1",
30
  "url": "https://rajhuggingface4253-qwen.hf.space",
31
  "endpoint": "/chat",
32
  "type": "streaming_chat",
 
33
  "payload": {
34
- "prompt": "--- HISTORY START ---\nUser: Hello, are you ready? Respond with just 'OK'\n--- HISTORY END ---\n\nUser's latest message: \"Hello, are you ready? Respond with just 'OK'\"",
35
- "max_new_tokens": 50,
36
  "temperature": 0.1,
37
  "enable_code_execution": False,
38
  "enable_web_search": False
@@ -43,9 +44,10 @@ models_to_warm = [
43
  "url": "https://rajhuggingface4253-qwen2.hf.space",
44
  "endpoint": "/chat",
45
  "type": "streaming_chat",
 
46
  "payload": {
47
- "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
48
- "max_new_tokens": 50,
49
  "temperature": 0.1,
50
  "enable_code_execution": False,
51
  "enable_web_search": False
@@ -56,9 +58,10 @@ models_to_warm = [
56
  "url": "https://rajhuggingface4253-qwen3.hf.space",
57
  "endpoint": "/chat",
58
  "type": "streaming_chat",
 
59
  "payload": {
60
- "prompt": "--- HISTORY START ---\nUser: Ping! Respond with 'pong'\n--- HISTORY END ---\n\nUser's latest message: \"Ping! Respond with 'pong'\"",
61
- "max_new_tokens": 50,
62
  "temperature": 0.1,
63
  "enable_code_execution": False,
64
  "enable_web_search": False
@@ -68,7 +71,8 @@ models_to_warm = [
68
  "name": "Kokoro TTS",
69
  "url": "https://rajhuggingface4253-koko.hf.space",
70
  "endpoint": "/health",
71
- "type": "health_check"
 
72
  }
73
  ]
74
 
@@ -101,10 +105,10 @@ async def ping_server(url: str) -> Dict:
101
  }
102
 
103
  async def warmup_chat_model(model_config: Dict) -> Dict:
104
- """Warm up chat models with CORRECT prompt format"""
105
  try:
106
  start_time = time.time()
107
- async with httpx.AsyncClient(timeout=30.0) as client:
108
 
109
  payload = model_config['payload']
110
  api_url = f"{model_config['url']}{model_config['endpoint']}"
@@ -113,7 +117,7 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
113
  response_time = round((time.time() - start_time) * 1000, 1)
114
 
115
  if response.status_code == 200:
116
- # Try to read streaming response to verify AI is actually responding
117
  try:
118
  collected_response = ""
119
  async for chunk in response.aiter_text():
@@ -121,12 +125,12 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
121
  collected_response += chunk
122
 
123
  # Check if we got a meaningful AI response
124
- if collected_response and len(collected_response) > 10:
125
  return {
126
  'status': 'success',
127
  'response_time_ms': response_time,
128
  'status_code': response.status_code,
129
- 'ai_response': collected_response[:100],
130
  'got_ai_response': True,
131
  'timestamp': datetime.now().isoformat()
132
  }
@@ -135,29 +139,43 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
135
  'status': 'success',
136
  'response_time_ms': response_time,
137
  'status_code': response.status_code,
138
- 'ai_response': 'No content in stream',
139
  'got_ai_response': False,
140
  'timestamp': datetime.now().isoformat()
141
  }
142
- except:
143
- # If streaming read fails, but status is 200, still consider it success
144
  return {
145
  'status': 'success',
146
  'response_time_ms': response_time,
147
  'status_code': response.status_code,
148
- 'ai_response': 'Stream endpoint responded',
149
  'got_ai_response': False,
150
  'timestamp': datetime.now().isoformat()
151
  }
152
  else:
 
 
 
 
 
 
 
 
153
  return {
154
  'status': 'error',
155
  'response_time_ms': response_time,
156
  'status_code': response.status_code,
157
- 'error': f"HTTP {response.status_code}",
158
  'timestamp': datetime.now().isoformat()
159
  }
160
 
 
 
 
 
 
 
 
161
  except Exception as e:
162
  return {
163
  'status': 'error',
@@ -170,7 +188,7 @@ async def warmup_health_model(model_config: Dict) -> Dict:
170
  """Warm up models using health endpoints"""
171
  try:
172
  start_time = time.time()
173
- async with httpx.AsyncClient(timeout=15.0) as client:
174
 
175
  api_url = f"{model_config['url']}{model_config['endpoint']}"
176
  response = await client.get(api_url)
 
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
+ # OPTIMIZED: Simpler prompts for faster responses
27
  models_to_warm = [
28
  {
29
  "name": "Qwen 1",
30
  "url": "https://rajhuggingface4253-qwen.hf.space",
31
  "endpoint": "/chat",
32
  "type": "streaming_chat",
33
+ "timeout": 45.0, # Increased timeout
34
  "payload": {
35
+ "prompt": "Hello, respond with OK",
36
+ "max_new_tokens": 10, # Shorter response
37
  "temperature": 0.1,
38
  "enable_code_execution": False,
39
  "enable_web_search": False
 
44
  "url": "https://rajhuggingface4253-qwen2.hf.space",
45
  "endpoint": "/chat",
46
  "type": "streaming_chat",
47
+ "timeout": 45.0,
48
  "payload": {
49
+ "prompt": "Hello, respond with OK",
50
+ "max_new_tokens": 10,
51
  "temperature": 0.1,
52
  "enable_code_execution": False,
53
  "enable_web_search": False
 
58
  "url": "https://rajhuggingface4253-qwen3.hf.space",
59
  "endpoint": "/chat",
60
  "type": "streaming_chat",
61
+ "timeout": 45.0,
62
  "payload": {
63
+ "prompt": "Hello, respond with OK",
64
+ "max_new_tokens": 10,
65
  "temperature": 0.1,
66
  "enable_code_execution": False,
67
  "enable_web_search": False
 
71
  "name": "Kokoro TTS",
72
  "url": "https://rajhuggingface4253-koko.hf.space",
73
  "endpoint": "/health",
74
+ "type": "health_check",
75
+ "timeout": 15.0
76
  }
77
  ]
78
 
 
105
  }
106
 
107
  async def warmup_chat_model(model_config: Dict) -> Dict:
108
+ """Warm up chat models with optimized approach"""
109
  try:
110
  start_time = time.time()
111
+ async with httpx.AsyncClient(timeout=model_config.get('timeout', 45.0)) as client:
112
 
113
  payload = model_config['payload']
114
  api_url = f"{model_config['url']}{model_config['endpoint']}"
 
117
  response_time = round((time.time() - start_time) * 1000, 1)
118
 
119
  if response.status_code == 200:
120
+ # Try to read streaming response
121
  try:
122
  collected_response = ""
123
  async for chunk in response.aiter_text():
 
125
  collected_response += chunk
126
 
127
  # Check if we got a meaningful AI response
128
+ if collected_response and len(collected_response.strip()) > 5:
129
  return {
130
  'status': 'success',
131
  'response_time_ms': response_time,
132
  'status_code': response.status_code,
133
+ 'ai_response': collected_response[:100].strip(),
134
  'got_ai_response': True,
135
  'timestamp': datetime.now().isoformat()
136
  }
 
139
  'status': 'success',
140
  'response_time_ms': response_time,
141
  'status_code': response.status_code,
142
+ 'ai_response': 'Empty response',
143
  'got_ai_response': False,
144
  'timestamp': datetime.now().isoformat()
145
  }
146
+ except Exception as e:
 
147
  return {
148
  'status': 'success',
149
  'response_time_ms': response_time,
150
  'status_code': response.status_code,
151
+ 'ai_response': f'Stream error: {str(e)}',
152
  'got_ai_response': False,
153
  'timestamp': datetime.now().isoformat()
154
  }
155
  else:
156
+ # Try to get error details
157
+ error_detail = ""
158
+ try:
159
+ error_text = await response.text()
160
+ error_detail = f" - {error_text[:100]}"
161
+ except:
162
+ pass
163
+
164
  return {
165
  'status': 'error',
166
  'response_time_ms': response_time,
167
  'status_code': response.status_code,
168
+ 'error': f"HTTP {response.status_code}{error_detail}",
169
  'timestamp': datetime.now().isoformat()
170
  }
171
 
172
+ except asyncio.TimeoutError:
173
+ return {
174
+ 'status': 'error',
175
+ 'error': 'Request timeout',
176
+ 'response_time_ms': round((time.time() - start_time) * 1000, 1),
177
+ 'timestamp': datetime.now().isoformat()
178
+ }
179
  except Exception as e:
180
  return {
181
  'status': 'error',
 
188
  """Warm up models using health endpoints"""
189
  try:
190
  start_time = time.time()
191
+ async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
192
 
193
  api_url = f"{model_config['url']}{model_config['endpoint']}"
194
  response = await client.get(api_url)