Update app.py
Browse files
app.py
CHANGED
|
@@ -2,12 +2,11 @@ from fastapi import FastAPI, HTTPException, Request
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import uvicorn
|
| 4 |
import requests
|
| 5 |
-
import asyncio
|
| 6 |
import os
|
| 7 |
import io
|
| 8 |
import time
|
| 9 |
from typing import List, Dict, Any
|
| 10 |
-
from llama_cpp import Llama #
|
| 11 |
from tqdm import tqdm
|
| 12 |
|
| 13 |
app = FastAPI()
|
|
@@ -69,7 +68,7 @@ class ModelManager:
|
|
| 69 |
temp_filename = await self.save_model_to_temp_file(model_config)
|
| 70 |
start_time = time.time()
|
| 71 |
print(f"Cargando modelo desde {temp_filename}")
|
| 72 |
-
llama = Llama(temp_filename) #
|
| 73 |
end_time = time.time()
|
| 74 |
load_duration = end_time - start_time
|
| 75 |
if load_duration > 0:
|
|
@@ -113,9 +112,10 @@ class ModelManager:
|
|
| 113 |
async with self.index_lock:
|
| 114 |
part_name = f"part_{part_index}"
|
| 115 |
print(f"Indexando parte {part_index}")
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
| 119 |
|
| 120 |
async def generate_response(self, user_input):
|
| 121 |
results = []
|
|
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import uvicorn
|
| 4 |
import requests
|
|
|
|
| 5 |
import os
|
| 6 |
import io
|
| 7 |
import time
|
| 8 |
from typing import List, Dict, Any
|
| 9 |
+
from llama_cpp import Llama # Asegúrate de ajustar esto según la biblioteca que utilices
|
| 10 |
from tqdm import tqdm
|
| 11 |
|
| 12 |
app = FastAPI()
|
|
|
|
| 68 |
temp_filename = await self.save_model_to_temp_file(model_config)
|
| 69 |
start_time = time.time()
|
| 70 |
print(f"Cargando modelo desde {temp_filename}")
|
| 71 |
+
llama = Llama.load(temp_filename) # Usa el método adecuado para cargar el modelo
|
| 72 |
end_time = time.time()
|
| 73 |
load_duration = end_time - start_time
|
| 74 |
if load_duration > 0:
|
|
|
|
| 112 |
async with self.index_lock:
|
| 113 |
part_name = f"part_{part_index}"
|
| 114 |
print(f"Indexando parte {part_index}")
|
| 115 |
+
# Usar un nombre de archivo temporal para cada parte del modelo
|
| 116 |
+
with open(f"/tmp/{part_name}.gguf", 'wb') as f:
|
| 117 |
+
f.write(model_part.getvalue())
|
| 118 |
+
print(f"Parte {part_index} indexada y guardada")
|
| 119 |
|
| 120 |
async def generate_response(self, user_input):
|
| 121 |
results = []
|