Update app.py
Browse files
app.py
CHANGED
|
@@ -1,21 +1,24 @@
|
|
|
|
|
| 1 |
from pydantic import BaseModel
|
| 2 |
-
from
|
| 3 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 4 |
import re
|
| 5 |
import httpx
|
| 6 |
import asyncio
|
| 7 |
import gradio as gr
|
| 8 |
import os
|
| 9 |
-
import gptcache
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
from fastapi import FastAPI, Request
|
| 12 |
from fastapi.responses import JSONResponse
|
| 13 |
import uvicorn
|
| 14 |
from threading import Thread
|
|
|
|
| 15 |
|
| 16 |
load_dotenv()
|
| 17 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
| 18 |
|
|
|
|
|
|
|
| 19 |
global_data = {
|
| 20 |
'models': {},
|
| 21 |
'tokens': {
|
|
@@ -124,10 +127,10 @@ def remove_duplicates(text):
|
|
| 124 |
def cache_response(func):
|
| 125 |
def wrapper(*args, **kwargs):
|
| 126 |
cache_key = f"{args}-{kwargs}"
|
| 127 |
-
if
|
| 128 |
-
return
|
| 129 |
response = func(*args, **kwargs)
|
| 130 |
-
|
| 131 |
return response
|
| 132 |
return wrapper
|
| 133 |
|
|
@@ -155,13 +158,13 @@ async def process_message(message):
|
|
| 155 |
]
|
| 156 |
responses = [
|
| 157 |
{'model': model_name, 'response': future.result()}
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
|
| 166 |
app = FastAPI()
|
| 167 |
|
|
@@ -175,7 +178,7 @@ async def generate(request: ChatRequest):
|
|
| 175 |
|
| 176 |
def run_uvicorn():
|
| 177 |
try:
|
| 178 |
-
uvicorn.run(app, host="0.0.0.0", port=
|
| 179 |
except Exception as e:
|
| 180 |
print(f"Error al ejecutar uvicorn: {e}")
|
| 181 |
|
|
@@ -184,7 +187,7 @@ iface = gr.Interface(
|
|
| 184 |
inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
|
| 185 |
outputs=gr.Markdown(),
|
| 186 |
title="Multi-Model LLM API (CPU Optimized)",
|
| 187 |
-
description="
|
| 188 |
)
|
| 189 |
|
| 190 |
def run_gradio():
|
|
@@ -193,4 +196,4 @@ def run_gradio():
|
|
| 193 |
if __name__ == "__main__":
|
| 194 |
Thread(target=run_uvicorn).start()
|
| 195 |
Thread(target=run_gradio).start()
|
| 196 |
-
asyncio.get_event_loop().run_forever()
|
|
|
|
| 1 |
+
import cachetools
|
| 2 |
from pydantic import BaseModel
|
| 3 |
+
from llama_cpp_agent import Llama
|
| 4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 5 |
import re
|
| 6 |
import httpx
|
| 7 |
import asyncio
|
| 8 |
import gradio as gr
|
| 9 |
import os
|
|
|
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
from fastapi import FastAPI, Request
|
| 12 |
from fastapi.responses import JSONResponse
|
| 13 |
import uvicorn
|
| 14 |
from threading import Thread
|
| 15 |
+
import gptcache
|
| 16 |
|
| 17 |
load_dotenv()
|
| 18 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
| 19 |
|
| 20 |
+
cache = cachetools.TTLCache(maxsize=100, ttl=60)
|
| 21 |
+
|
| 22 |
global_data = {
|
| 23 |
'models': {},
|
| 24 |
'tokens': {
|
|
|
|
| 127 |
def cache_response(func):
|
| 128 |
def wrapper(*args, **kwargs):
|
| 129 |
cache_key = f"{args}-{kwargs}"
|
| 130 |
+
if cache_key in cache:
|
| 131 |
+
return cache[cache_key]
|
| 132 |
response = func(*args, **kwargs)
|
| 133 |
+
cache[cache_key] = response
|
| 134 |
return response
|
| 135 |
return wrapper
|
| 136 |
|
|
|
|
| 158 |
]
|
| 159 |
responses = [
|
| 160 |
{'model': model_name, 'response': future.result()}
|
| 161 |
+
for model_name, future in zip(global_data['models'].keys(), as_completed(futures))
|
| 162 |
+
]
|
| 163 |
+
unique_responses = remove_repetitive_responses(responses)
|
| 164 |
+
formatted_response = ""
|
| 165 |
+
for model, response in unique_responses.items():
|
| 166 |
+
formatted_response += f"**{model}:**\n{response}\n\n"
|
| 167 |
+
return formatted_response
|
| 168 |
|
| 169 |
app = FastAPI()
|
| 170 |
|
|
|
|
| 178 |
|
| 179 |
def run_uvicorn():
|
| 180 |
try:
|
| 181 |
+
uvicorn.run(app, host="0.0.0.0", port=7861)
|
| 182 |
except Exception as e:
|
| 183 |
print(f"Error al ejecutar uvicorn: {e}")
|
| 184 |
|
|
|
|
| 187 |
inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
|
| 188 |
outputs=gr.Markdown(),
|
| 189 |
title="Multi-Model LLM API (CPU Optimized)",
|
| 190 |
+
description=""
|
| 191 |
)
|
| 192 |
|
| 193 |
def run_gradio():
|
|
|
|
| 196 |
if __name__ == "__main__":
|
| 197 |
Thread(target=run_uvicorn).start()
|
| 198 |
Thread(target=run_gradio).start()
|
| 199 |
+
asyncio.get_event_loop().run_forever()
|