AI-Talent-Force Claude Sonnet 4.5 commited on
Commit ·
eaa113d
1
Parent(s): c8d6960
Optimize inference speed and fix Spaces compatibility
Browse files- Updated spaces to >=0.43.0 (fixes hot-reload error)
- Fixed Gradio 6.0 theme deprecation warning
- Added GPU duration=60s to keep GPU allocated between requests
- Reduced max_new_tokens from 512 to 256 for faster responses
- Limited conversation history to last 5 exchanges for speed
- Reduced tokenization max_length from 4096 to 2048
- Added use_cache=True for faster generation
- Disabled SSR mode in launch
These changes should significantly reduce response time.
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- app.py +11 -9
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -47,7 +47,7 @@ print("🎯 CEO AI EXECUTIVE IS READY!")
|
|
| 47 |
print("=" * 60)
|
| 48 |
print("Model is loaded in memory and ready for fast inference.\n")
|
| 49 |
|
| 50 |
-
@spaces.GPU
|
| 51 |
def chat_with_ceo(message, history):
|
| 52 |
"""
|
| 53 |
Chat function that responds like the CEO
|
|
@@ -55,9 +55,10 @@ def chat_with_ceo(message, history):
|
|
| 55 |
message: User's current message
|
| 56 |
history: List of previous messages [[user_msg, bot_msg], ...]
|
| 57 |
"""
|
| 58 |
-
# Build conversation context
|
| 59 |
conversation = []
|
| 60 |
-
|
|
|
|
| 61 |
conversation.append({"role": "user", "content": user_msg})
|
| 62 |
conversation.append({"role": "assistant", "content": bot_msg})
|
| 63 |
|
|
@@ -71,20 +72,21 @@ def chat_with_ceo(message, history):
|
|
| 71 |
)
|
| 72 |
|
| 73 |
# Tokenize
|
| 74 |
-
inputs = tokenizer(prompt, return_tensors="pt", truncate=True, max_length=
|
| 75 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 76 |
|
| 77 |
-
# Generate response
|
| 78 |
with torch.no_grad():
|
| 79 |
outputs = model.generate(
|
| 80 |
**inputs,
|
| 81 |
-
max_new_tokens=
|
| 82 |
temperature=0.7,
|
| 83 |
top_p=0.9,
|
| 84 |
do_sample=True,
|
| 85 |
repetition_penalty=1.1,
|
| 86 |
pad_token_id=tokenizer.pad_token_id,
|
| 87 |
-
eos_token_id=tokenizer.eos_token_id
|
|
|
|
| 88 |
)
|
| 89 |
|
| 90 |
# Decode response
|
|
@@ -92,7 +94,7 @@ def chat_with_ceo(message, history):
|
|
| 92 |
return response
|
| 93 |
|
| 94 |
# Create Gradio interface
|
| 95 |
-
with gr.Blocks(
|
| 96 |
gr.Markdown(
|
| 97 |
"""
|
| 98 |
# 🎯 CEO AI Executive
|
|
@@ -159,4 +161,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 159 |
|
| 160 |
if __name__ == "__main__":
|
| 161 |
demo.queue()
|
| 162 |
-
demo.launch()
|
|
|
|
| 47 |
print("=" * 60)
|
| 48 |
print("Model is loaded in memory and ready for fast inference.\n")
|
| 49 |
|
| 50 |
+
@spaces.GPU(duration=60)
|
| 51 |
def chat_with_ceo(message, history):
|
| 52 |
"""
|
| 53 |
Chat function that responds like the CEO
|
|
|
|
| 55 |
message: User's current message
|
| 56 |
history: List of previous messages [[user_msg, bot_msg], ...]
|
| 57 |
"""
|
| 58 |
+
# Build conversation context (limit history to last 5 exchanges for speed)
|
| 59 |
conversation = []
|
| 60 |
+
recent_history = history[-5:] if len(history) > 5 else history
|
| 61 |
+
for user_msg, bot_msg in recent_history:
|
| 62 |
conversation.append({"role": "user", "content": user_msg})
|
| 63 |
conversation.append({"role": "assistant", "content": bot_msg})
|
| 64 |
|
|
|
|
| 72 |
)
|
| 73 |
|
| 74 |
# Tokenize
|
| 75 |
+
inputs = tokenizer(prompt, return_tensors="pt", truncate=True, max_length=2048)
|
| 76 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 77 |
|
| 78 |
+
# Generate response with optimized parameters for speed
|
| 79 |
with torch.no_grad():
|
| 80 |
outputs = model.generate(
|
| 81 |
**inputs,
|
| 82 |
+
max_new_tokens=256,
|
| 83 |
temperature=0.7,
|
| 84 |
top_p=0.9,
|
| 85 |
do_sample=True,
|
| 86 |
repetition_penalty=1.1,
|
| 87 |
pad_token_id=tokenizer.pad_token_id,
|
| 88 |
+
eos_token_id=tokenizer.eos_token_id,
|
| 89 |
+
use_cache=True
|
| 90 |
)
|
| 91 |
|
| 92 |
# Decode response
|
|
|
|
| 94 |
return response
|
| 95 |
|
| 96 |
# Create Gradio interface
|
| 97 |
+
with gr.Blocks() as demo:
|
| 98 |
gr.Markdown(
|
| 99 |
"""
|
| 100 |
# 🎯 CEO AI Executive
|
|
|
|
| 161 |
|
| 162 |
if __name__ == "__main__":
|
| 163 |
demo.queue()
|
| 164 |
+
demo.launch(share=False, ssr_mode=False)
|
requirements.txt
CHANGED
|
@@ -4,6 +4,6 @@ torch==2.5.1
|
|
| 4 |
peft==0.18.1
|
| 5 |
accelerate==1.2.1
|
| 6 |
safetensors==0.4.5
|
| 7 |
-
spaces
|
| 8 |
bitsandbytes>=0.46.1
|
| 9 |
audioop-lts
|
|
|
|
| 4 |
peft==0.18.1
|
| 5 |
accelerate==1.2.1
|
| 6 |
safetensors==0.4.5
|
| 7 |
+
spaces>=0.43.0
|
| 8 |
bitsandbytes>=0.46.1
|
| 9 |
audioop-lts
|