anaspro
commited on
Commit
·
efc21db
1
Parent(s):
df89878
Disable @spaces.GPU decorator for T4 dedicated GPU
Browse files- Comment out @spaces.GPU() - not needed with dedicated T4
- Add comment explaining using T4 instead of ZeroGPU
- Model loads on startup and stays in memory
- No quota limits, always ready for production
app.py
CHANGED
|
@@ -68,7 +68,7 @@ def format_conversation_history(chat_history):
|
|
| 68 |
messages.append({"role": role, "content": content})
|
| 69 |
return messages
|
| 70 |
|
| 71 |
-
@spaces.GPU()
|
| 72 |
def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
|
| 73 |
new_message = {"role": "user", "content": input_data}
|
| 74 |
processed_history = format_conversation_history(chat_history)
|
|
|
|
| 68 |
messages.append({"role": role, "content": content})
|
| 69 |
return messages
|
| 70 |
|
| 71 |
+
# @spaces.GPU() # Disabled - using dedicated T4 GPU instead of ZeroGPU
|
| 72 |
def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
|
| 73 |
new_message = {"role": "user", "content": input_data}
|
| 74 |
processed_history = format_conversation_history(chat_history)
|