Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,20 +5,11 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
| 5 |
import uvicorn
|
| 6 |
import re
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
-
import gradio as gr
|
| 9 |
-
from spaces import ZeroGPU
|
| 10 |
|
| 11 |
load_dotenv()
|
| 12 |
|
| 13 |
app = FastAPI()
|
| 14 |
|
| 15 |
-
# Inicializar ZeroGPU
|
| 16 |
-
try:
|
| 17 |
-
ZeroGPU.initialize()
|
| 18 |
-
except Exception as e:
|
| 19 |
-
print(f"ZeroGPU initialization failed: {e}")
|
| 20 |
-
|
| 21 |
-
# Diccionario global para almacenar modelos y tokens
|
| 22 |
global_data = {
|
| 23 |
'models': {},
|
| 24 |
'tokens': {
|
|
@@ -69,22 +60,18 @@ class ModelManager:
|
|
| 69 |
def load_all_models(self):
|
| 70 |
if self.loaded:
|
| 71 |
return global_data['models']
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
return global_data['models']
|
| 85 |
-
except Exception as e:
|
| 86 |
-
print(f"Error loading models: {e}")
|
| 87 |
-
return {}
|
| 88 |
|
| 89 |
model_manager = ModelManager()
|
| 90 |
model_manager.load_all_models()
|
|
@@ -111,7 +98,6 @@ def remove_duplicates(text):
|
|
| 111 |
seen_lines.add(line)
|
| 112 |
return '\n'.join(unique_lines)
|
| 113 |
|
| 114 |
-
@spaces.GPU(duration=0)
|
| 115 |
def generate_model_response(model, inputs, top_k, top_p, temperature):
|
| 116 |
try:
|
| 117 |
response = model.generate(inputs, top_k=top_k, top_p=top_p, temperature=temperature)
|
|
@@ -124,11 +110,12 @@ def generate_model_response(model, inputs, top_k, top_p, temperature):
|
|
| 124 |
async def generate(request: ChatRequest):
|
| 125 |
try:
|
| 126 |
inputs = normalize_input(request.message)
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
| 132 |
unique_responses = remove_repetitive_responses(responses)
|
| 133 |
return unique_responses
|
| 134 |
except Exception as e:
|
|
@@ -152,4 +139,4 @@ def remove_repetitive_responses(responses):
|
|
| 152 |
return unique_responses
|
| 153 |
|
| 154 |
if __name__ == "__main__":
|
| 155 |
-
uvicorn.run(app, host="0.0.0.0", port=
|
|
|
|
| 5 |
import uvicorn
|
| 6 |
import re
|
| 7 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
| 8 |
|
| 9 |
load_dotenv()
|
| 10 |
|
| 11 |
app = FastAPI()
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
global_data = {
|
| 14 |
'models': {},
|
| 15 |
'tokens': {
|
|
|
|
| 60 |
def load_all_models(self):
|
| 61 |
if self.loaded:
|
| 62 |
return global_data['models']
|
| 63 |
+
|
| 64 |
+
with ThreadPoolExecutor() as executor:
|
| 65 |
+
futures = [executor.submit(self.load_model, config) for config in model_configs]
|
| 66 |
+
models = []
|
| 67 |
+
for future in as_completed(futures):
|
| 68 |
+
model = future.result()
|
| 69 |
+
if model:
|
| 70 |
+
models.append(model)
|
| 71 |
+
|
| 72 |
+
global_data['models'] = {model['name']: model['model'] for model in models}
|
| 73 |
+
self.loaded = True
|
| 74 |
+
return global_data['models']
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
model_manager = ModelManager()
|
| 77 |
model_manager.load_all_models()
|
|
|
|
| 98 |
seen_lines.add(line)
|
| 99 |
return '\n'.join(unique_lines)
|
| 100 |
|
|
|
|
| 101 |
def generate_model_response(model, inputs, top_k, top_p, temperature):
|
| 102 |
try:
|
| 103 |
response = model.generate(inputs, top_k=top_k, top_p=top_p, temperature=temperature)
|
|
|
|
| 110 |
async def generate(request: ChatRequest):
|
| 111 |
try:
|
| 112 |
inputs = normalize_input(request.message)
|
| 113 |
+
with ThreadPoolExecutor() as executor:
|
| 114 |
+
futures = [
|
| 115 |
+
executor.submit(generate_model_response, model, inputs, request.top_k, request.top_p, request.temperature)
|
| 116 |
+
for model in global_data['models'].values()
|
| 117 |
+
]
|
| 118 |
+
responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
|
| 119 |
unique_responses = remove_repetitive_responses(responses)
|
| 120 |
return unique_responses
|
| 121 |
except Exception as e:
|
|
|
|
| 139 |
return unique_responses
|
| 140 |
|
| 141 |
if __name__ == "__main__":
|
| 142 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|