Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -53,9 +53,12 @@ class ModelManager:
|
|
| 53 |
self.models = []
|
| 54 |
self.loaded = False
|
| 55 |
|
| 56 |
-
@spaces.GPU(duration=0)
|
| 57 |
def load_model(self, model_config):
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
def load_all_models(self):
|
| 61 |
if self.loaded:
|
|
@@ -65,11 +68,9 @@ class ModelManager:
|
|
| 65 |
futures = [executor.submit(self.load_model, config) for config in model_configs]
|
| 66 |
models = []
|
| 67 |
for future in as_completed(futures):
|
| 68 |
-
|
| 69 |
-
|
| 70 |
models.append(model)
|
| 71 |
-
except Exception:
|
| 72 |
-
pass
|
| 73 |
|
| 74 |
self.models = models
|
| 75 |
self.loaded = True
|
|
@@ -84,7 +85,6 @@ class ChatRequest(BaseModel):
|
|
| 84 |
top_p: float = 0.95
|
| 85 |
temperature: float = 0.7
|
| 86 |
|
| 87 |
-
@spaces.GPU(duration=0)
|
| 88 |
def generate_chat_response(request, model_data):
|
| 89 |
try:
|
| 90 |
user_input = normalize_input(request.message)
|
|
@@ -138,8 +138,8 @@ def generate_chat(request: ChatRequest):
|
|
| 138 |
try:
|
| 139 |
response = future.result()
|
| 140 |
responses.append(response)
|
| 141 |
-
except Exception:
|
| 142 |
-
|
| 143 |
|
| 144 |
if not responses:
|
| 145 |
raise HTTPException(status_code=500, detail="Error: No responses generated.")
|
|
|
|
| 53 |
self.models = []
|
| 54 |
self.loaded = False
|
| 55 |
|
|
|
|
| 56 |
def load_model(self, model_config):
|
| 57 |
+
try:
|
| 58 |
+
return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"Error loading model {model_config['name']}: {e}")
|
| 61 |
+
return None
|
| 62 |
|
| 63 |
def load_all_models(self):
|
| 64 |
if self.loaded:
|
|
|
|
| 68 |
futures = [executor.submit(self.load_model, config) for config in model_configs]
|
| 69 |
models = []
|
| 70 |
for future in as_completed(futures):
|
| 71 |
+
model = future.result()
|
| 72 |
+
if model:
|
| 73 |
models.append(model)
|
|
|
|
|
|
|
| 74 |
|
| 75 |
self.models = models
|
| 76 |
self.loaded = True
|
|
|
|
| 85 |
top_p: float = 0.95
|
| 86 |
temperature: float = 0.7
|
| 87 |
|
|
|
|
| 88 |
def generate_chat_response(request, model_data):
|
| 89 |
try:
|
| 90 |
user_input = normalize_input(request.message)
|
|
|
|
| 138 |
try:
|
| 139 |
response = future.result()
|
| 140 |
responses.append(response)
|
| 141 |
+
except Exception as e:
|
| 142 |
+
print(f"Error in response generation: {e}")
|
| 143 |
|
| 144 |
if not responses:
|
| 145 |
raise HTTPException(status_code=500, detail="Error: No responses generated.")
|