Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ from llama_cpp import Llama
|
|
| 4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 5 |
import uvicorn
|
| 6 |
import huggingface_hub
|
| 7 |
-
import spaces
|
| 8 |
import re
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
|
|
@@ -75,6 +74,7 @@ class ModelManager:
|
|
| 75 |
return models
|
| 76 |
|
| 77 |
model_manager = ModelManager()
|
|
|
|
| 78 |
|
| 79 |
class ChatRequest(BaseModel):
|
| 80 |
message: str
|
|
@@ -119,10 +119,8 @@ def remove_repetitive_responses(responses):
|
|
| 119 |
return unique_responses
|
| 120 |
|
| 121 |
@app.post("/generate")
|
| 122 |
-
@spaces.GPU(duration=0)
|
| 123 |
async def generate(request: ChatRequest):
|
| 124 |
try:
|
| 125 |
-
global_data['models'] = model_manager.load_all_models()
|
| 126 |
responses = []
|
| 127 |
with ThreadPoolExecutor() as executor:
|
| 128 |
futures = [executor.submit(generate_chat_response, request, model_data) for model_data in global_data['models']]
|
|
|
|
| 4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 5 |
import uvicorn
|
| 6 |
import huggingface_hub
|
|
|
|
| 7 |
import re
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
|
|
|
|
| 74 |
return models
|
| 75 |
|
| 76 |
model_manager = ModelManager()
|
| 77 |
+
model_manager.load_all_models() # Cargar modelos una vez al inicio
|
| 78 |
|
| 79 |
class ChatRequest(BaseModel):
|
| 80 |
message: str
|
|
|
|
| 119 |
return unique_responses
|
| 120 |
|
| 121 |
@app.post("/generate")
|
|
|
|
| 122 |
async def generate(request: ChatRequest):
|
| 123 |
try:
|
|
|
|
| 124 |
responses = []
|
| 125 |
with ThreadPoolExecutor() as executor:
|
| 126 |
futures = [executor.submit(generate_chat_response, request, model_data) for model_data in global_data['models']]
|