Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,6 @@ from dotenv import load_dotenv
|
|
| 13 |
from pydantic import BaseModel
|
| 14 |
from fastapi import FastAPI, Request
|
| 15 |
from fastapi.responses import JSONResponse
|
| 16 |
-
import uvicorn
|
| 17 |
|
| 18 |
# ----------------- Configuration & Models -----------------
|
| 19 |
load_dotenv()
|
|
@@ -69,23 +68,17 @@ class ModelManager:
|
|
| 69 |
self.model_loaded = False
|
| 70 |
|
| 71 |
async def initialize(self) -> None:
|
| 72 |
-
"""
|
| 73 |
-
Inicializa o pipeline. Usa HF_TOKEN (variável de ambiente ou Config).
|
| 74 |
-
Evita passar `use_auth_token` em model_kwargs.
|
| 75 |
-
"""
|
| 76 |
if not self.config.HF_TOKEN:
|
| 77 |
-
logger.error("Token do Hugging Face não encontrado. O carregamento do modelo
|
| 78 |
return
|
| 79 |
|
| 80 |
try:
|
| 81 |
logger.info(f"A carregar o modelo: {self.config.MODEL_NAME}...")
|
| 82 |
-
# garante env var como fallback
|
| 83 |
os.environ.setdefault("HF_TOKEN", self.config.HF_TOKEN)
|
| 84 |
|
| 85 |
loop = asyncio.get_event_loop()
|
| 86 |
|
| 87 |
def load_pipeline():
|
| 88 |
-
# Passa token diretamente (substitui use_auth_token)
|
| 89 |
return pipeline(
|
| 90 |
"text-generation",
|
| 91 |
model=self.config.MODEL_NAME,
|
|
@@ -104,51 +97,46 @@ class ModelManager:
|
|
| 104 |
if not self.model_loaded or self.pipeline is None:
|
| 105 |
return False, "❌ O modelo não está disponível. Por favor, verifique os logs do servidor.", 0
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
return False, "⚠️ O prompt não pode estar vazio.", 0
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
)
|
| 121 |
-
else:
|
| 122 |
-
prompt_text = request.prompt.strip()
|
| 123 |
-
|
| 124 |
-
outputs = self.pipeline(
|
| 125 |
-
prompt_text,
|
| 126 |
-
max_new_tokens=min(request.max_tokens, self.config.MAX_TOKENS),
|
| 127 |
-
do_sample=True,
|
| 128 |
-
temperature=request.temperature,
|
| 129 |
-
top_k=request.top_k,
|
| 130 |
-
top_p=request.top_p,
|
| 131 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
tokens_used = 0
|
| 138 |
-
if tokenizer and hasattr(tokenizer, "encode"):
|
| 139 |
-
try:
|
| 140 |
-
tokens_used = len(tokenizer.encode(generated_text))
|
| 141 |
-
except Exception:
|
| 142 |
-
tokens_used = 0
|
| 143 |
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
-
generated_text, tokens_used
|
| 147 |
-
return True, generated_text, tokens_used
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
return False, f"❌ A geração falhou: {str(e)}", 0
|
| 152 |
|
| 153 |
|
| 154 |
# ----------------- Service Layer -----------------
|
|
@@ -161,7 +149,6 @@ class GemmaService:
|
|
| 161 |
await self.model_manager.initialize()
|
| 162 |
|
| 163 |
async def generate_text(self, api_key: str, prompt: str, **kwargs) -> APIResponse:
|
| 164 |
-
# Validação simples da gsk-... chave da app
|
| 165 |
if not api_key or not isinstance(api_key, str) or not api_key.startswith("gsk-"):
|
| 166 |
return APIResponse(success=False, error="Chave de API inválida ou ausente.")
|
| 167 |
try:
|
|
@@ -176,7 +163,7 @@ class GemmaService:
|
|
| 176 |
return APIResponse(success=False, error="Ocorreu um erro interno no serviço.")
|
| 177 |
|
| 178 |
|
| 179 |
-
# ----------------- Gradio UI -----------------
|
| 180 |
class GradioInterface:
|
| 181 |
def __init__(self, service: GemmaService):
|
| 182 |
self.service = service
|
|
@@ -184,29 +171,17 @@ class GradioInterface:
|
|
| 184 |
def create_custom_css(self) -> str:
|
| 185 |
return """
|
| 186 |
@import url('https://fonts.googleapis.com/css2?family=Material+Icons&display=swap');
|
| 187 |
-
|
| 188 |
:root { --dark-bg:#0a0a0a; --panel-bg:#1a1a1a; --border-color:#333; --text-color:#f0f0f0; --text-light:#a0a0a0; --accent-orange:#FF4500; --accent-orange-hover:#FF6347; --code-bg:#282c34; }
|
| 189 |
.gradio-container { background: var(--dark-bg) !important; color: var(--text-color); }
|
| 190 |
-
|
| 191 |
-
#right_panel, #left_panel { background: var(--panel-bg); border: 1px solid var(--border-color); border-radius: 16px; padding: 2rem !important; }
|
| 192 |
-
#left_panel { display: flex !important; flex-direction: column !important; height: 80vh; }
|
| 193 |
-
#output_display { flex-grow: 1; overflow-y: auto; padding-right: 1rem; color: var(--text-color); }
|
| 194 |
-
#output_display p { margin-bottom: 1rem; line-height: 1.7; }
|
| 195 |
-
#input_area { margin-top: 1rem; }
|
| 196 |
-
#api_key_input textarea, #prompt_input textarea { background-color: #2C2C2C !important; border-color: var(--border-color) !important; color: var(--text-color) !important; border-radius: 12px !important; }
|
| 197 |
-
#send_button { background: var(--accent-orange); color: white; border: none; border-radius: 12px !important; transition: background-color 0.3s ease; position: relative; padding-left: 3rem; }
|
| 198 |
-
#send_button:hover { background-color: var(--accent-orange-hover); }
|
| 199 |
-
#generate_button { background: linear-gradient(135deg, var(--accent-orange), var(--accent-orange-hover)); color: white !important; font-size: 1.1rem !important; font-weight: bold !important; border: none; border-radius: 12px !important; padding: 1rem 1.25rem !important; box-shadow: 0 4px 15px rgba(255,69,0,0.4); transition: all 0.3s ease; position: relative; padding-left: 3rem; }
|
| 200 |
-
#generate_button:hover { transform: translateY(-2px); box-shadow: 0 6px 20px rgba(255,69,0,0.6); }
|
| 201 |
-
.code-snippet { background-color: var(--code-bg); color: #abb2bf; padding: 1.5rem; border-radius: 12px; font-family: 'Courier New', monospace; white-space: pre-wrap; word-wrap: break-word; border: 1px solid var(--border-color); }
|
| 202 |
-
.gr-slider { color: var(--text-light); }
|
| 203 |
-
|
| 204 |
#send_button::before { content: "send"; font-family: 'Material Icons', sans-serif; position:absolute; left:12px; top:50%; transform:translateY(-50%); font-size:18px; opacity:0.95; }
|
| 205 |
#generate_button::before { content: "auto_awesome"; font-family: 'Material Icons', sans-serif; position:absolute; left:12px; top:50%; transform:translateY(-50%); font-size:18px; opacity:0.95; }
|
| 206 |
"""
|
| 207 |
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
| 210 |
with gr.Row(elem_id="main_layout", equal_height=False):
|
| 211 |
with gr.Column(scale=2):
|
| 212 |
with gr.Column(elem_id="left_panel"):
|
|
@@ -233,25 +208,10 @@ class GradioInterface:
|
|
| 233 |
|
| 234 |
def handle_key_generation():
|
| 235 |
key = f"gsk-{secrets.token_urlsafe(24).replace('_', '').replace('-', '')}"
|
| 236 |
-
code_html = f''
|
| 237 |
-
<div class="code-snippet">
|
| 238 |
-
<div><span class="keyword">import</span> requests</div>
|
| 239 |
-
<div> </div>
|
| 240 |
-
<div>url = <span class="string">"https://GuXSs.hf.space/api/generate"</span></div>
|
| 241 |
-
<div>payload = {{</div>
|
| 242 |
-
<div> <span class="string">"api_key"</span>: <span class="string">"{key}"</span>,</div>
|
| 243 |
-
<div> <span class="string">"prompt"</span>: <span class="string">"Escreva um haikai sobre o universo"</span>,</div>
|
| 244 |
-
<div> <span class="string">"max_tokens"</span>: <span class="number">50</span></div>
|
| 245 |
-
<div>}}</div>
|
| 246 |
-
<div> </div>
|
| 247 |
-
<div>response = requests.post(url, json=payload)</div>
|
| 248 |
-
<div><span class="keyword">print</span>(response.json())</div>
|
| 249 |
-
</div>
|
| 250 |
-
'''
|
| 251 |
return key, gr.update(value=code_html)
|
| 252 |
|
| 253 |
async def handle_generation(api_key, prompt, temp, max_tokens, top_k, top_p, btn):
|
| 254 |
-
# função que Gradio chama quando botão é pressionado na UI
|
| 255 |
if not api_key:
|
| 256 |
yield "<p style='color: #FFCC00;'>Por favor, insira a sua chave de API para começar.</p>", gr.update(value="➤ Enviar", interactive=True)
|
| 257 |
return
|
|
@@ -268,104 +228,80 @@ class GradioInterface:
|
|
| 268 |
else:
|
| 269 |
yield f"<p style='color: #FF4500;'>{response.error}</p>", gr.update(value="➤ Enviar", interactive=True)
|
| 270 |
|
| 271 |
-
# conectar o callback
|
| 272 |
send_button.click(
|
| 273 |
handle_generation,
|
| 274 |
inputs=[api_key_input, prompt_input, temp_slider, max_tokens_slider, top_k_slider, top_p_slider, send_button],
|
| 275 |
outputs=[output_display, send_button],
|
| 276 |
api_name="generate",
|
| 277 |
)
|
| 278 |
-
|
| 279 |
key_button.click(handle_key_generation, outputs=[api_key_input, api_example_display])
|
| 280 |
demo.load(lambda: gr.update(value="<p style='color: #a0a0a0;'>Clique em 'Gerar Nova Chave' para ver um exemplo de código.</p>"), [], [api_example_display])
|
| 281 |
|
| 282 |
return demo
|
| 283 |
|
| 284 |
|
| 285 |
-
# ----------------- FastAPI
|
| 286 |
-
|
| 287 |
-
|
|
|
|
| 288 |
|
| 289 |
-
|
| 290 |
-
try:
|
| 291 |
-
# função disponibilizada por versões recentes do gradio
|
| 292 |
-
gr.mount_gradio_app(fast_app, gradio_blocks, path="/")
|
| 293 |
-
except Exception as exc:
|
| 294 |
-
logger.warning("Não foi possível montar Gradio com mount_gradio_app: %s. A UI pode não funcionar embutida.", exc)
|
| 295 |
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
"""
|
| 302 |
-
try:
|
| 303 |
-
body = await req.json()
|
| 304 |
-
except Exception:
|
| 305 |
-
return JSONResponse(status_code=400, content={"success": False, "error": "Payload inválido (JSON esperado)."})
|
| 306 |
-
|
| 307 |
-
api_key = body.get("api_key")
|
| 308 |
-
prompt = body.get("prompt", "")
|
| 309 |
-
max_tokens = int(body.get("max_tokens", 512))
|
| 310 |
-
temperature = float(body.get("temperature", 0.7))
|
| 311 |
-
top_k = int(body.get("top_k", 50))
|
| 312 |
-
top_p = float(body.get("top_p", 0.95))
|
| 313 |
-
|
| 314 |
-
resp = await service.generate_text(api_key=api_key, prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_k=top_k, top_p=top_p)
|
| 315 |
-
status = 200 if resp.success else 400
|
| 316 |
-
return JSONResponse(status_code=status, content=resp.dict())
|
| 317 |
-
|
| 318 |
-
@fast_app.post("/run/generate")
|
| 319 |
-
async def gradio_compatible_generate(req: Request):
|
| 320 |
-
"""
|
| 321 |
-
Endpoint compatível com o formato 'Gradio' (data array).
|
| 322 |
-
Exemplo:
|
| 323 |
-
{ "data": [ "gsk-..", "prompt...", 128, 0.7, 50, 0.95 ] }
|
| 324 |
-
"""
|
| 325 |
-
try:
|
| 326 |
-
body = await req.json()
|
| 327 |
-
except Exception:
|
| 328 |
-
return JSONResponse(status_code=400, content={"success": False, "error": "Payload inválido (JSON esperado)."})
|
| 329 |
|
| 330 |
-
data = body.get("data")
|
| 331 |
-
if not isinstance(data, list):
|
| 332 |
-
return JSONResponse(status_code=400, content={"success": False, "error": "Campo 'data' inválido. Esperado array."})
|
| 333 |
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
temperature = float(data[3]) if len(data) > 3 else 0.7
|
| 340 |
-
top_k = int(data[4]) if len(data) > 4 else 50
|
| 341 |
-
top_p = float(data[5]) if len(data) > 5 else 0.95
|
| 342 |
-
except Exception as e:
|
| 343 |
-
return JSONResponse(status_code=400, content={"success": False, "error": f"Erro ao parsear 'data': {e}"})
|
| 344 |
-
|
| 345 |
-
resp = await service.generate_text(api_key=api_key, prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_k=top_k, top_p=top_p)
|
| 346 |
-
status = 200 if resp.success else 400
|
| 347 |
-
return JSONResponse(status_code=status, content=resp.dict())
|
| 348 |
|
| 349 |
-
return fast_app
|
| 350 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 351 |
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
|
|
|
|
|
|
| 356 |
|
| 357 |
-
|
| 358 |
-
|
|
|
|
| 359 |
|
| 360 |
-
fast_app = create_fastapi_app(gradio_blocks, service)
|
| 361 |
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
|
|
|
|
|
|
| 366 |
|
|
|
|
|
|
|
|
|
|
| 367 |
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
from pydantic import BaseModel
|
| 14 |
from fastapi import FastAPI, Request
|
| 15 |
from fastapi.responses import JSONResponse
|
|
|
|
| 16 |
|
| 17 |
# ----------------- Configuration & Models -----------------
|
| 18 |
load_dotenv()
|
|
|
|
| 68 |
self.model_loaded = False
|
| 69 |
|
| 70 |
async def initialize(self) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
if not self.config.HF_TOKEN:
|
| 72 |
+
logger.error("Token do Hugging Face não encontrado. O carregamento do modelo poderá falhar.")
|
| 73 |
return
|
| 74 |
|
| 75 |
try:
|
| 76 |
logger.info(f"A carregar o modelo: {self.config.MODEL_NAME}...")
|
|
|
|
| 77 |
os.environ.setdefault("HF_TOKEN", self.config.HF_TOKEN)
|
| 78 |
|
| 79 |
loop = asyncio.get_event_loop()
|
| 80 |
|
| 81 |
def load_pipeline():
|
|
|
|
| 82 |
return pipeline(
|
| 83 |
"text-generation",
|
| 84 |
model=self.config.MODEL_NAME,
|
|
|
|
| 97 |
if not self.model_loaded or self.pipeline is None:
|
| 98 |
return False, "❌ O modelo não está disponível. Por favor, verifique os logs do servidor.", 0
|
| 99 |
|
| 100 |
+
if not request.prompt.strip():
|
| 101 |
+
return False, "⚠️ O prompt não pode estar vazio.", 0
|
|
|
|
| 102 |
|
| 103 |
+
loop = asyncio.get_event_loop()
|
| 104 |
+
messages = [{"role": "user", "content": request.prompt.strip()}]
|
| 105 |
|
| 106 |
+
def do_generation():
|
| 107 |
+
tokenizer = getattr(self.pipeline, "tokenizer", None)
|
| 108 |
|
| 109 |
+
if tokenizer and hasattr(tokenizer, "apply_chat_template"):
|
| 110 |
+
prompt_text = tokenizer.apply_chat_template(
|
| 111 |
+
messages, tokenize=False, add_generation_prompt=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
)
|
| 113 |
+
else:
|
| 114 |
+
prompt_text = request.prompt.strip()
|
| 115 |
+
|
| 116 |
+
outputs = self.pipeline(
|
| 117 |
+
prompt_text,
|
| 118 |
+
max_new_tokens=min(request.max_tokens, self.config.MAX_TOKENS),
|
| 119 |
+
do_sample=True,
|
| 120 |
+
temperature=request.temperature,
|
| 121 |
+
top_k=request.top_k,
|
| 122 |
+
top_p=request.top_p,
|
| 123 |
+
)
|
| 124 |
|
| 125 |
+
generated_text = outputs[0].get("generated_text", "")
|
| 126 |
+
if generated_text.startswith(prompt_text):
|
| 127 |
+
generated_text = generated_text[len(prompt_text):]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
+
tokens_used = 0
|
| 130 |
+
if tokenizer and hasattr(tokenizer, "encode"):
|
| 131 |
+
try:
|
| 132 |
+
tokens_used = len(tokenizer.encode(generated_text))
|
| 133 |
+
except Exception:
|
| 134 |
+
tokens_used = 0
|
| 135 |
|
| 136 |
+
return generated_text, tokens_used
|
|
|
|
| 137 |
|
| 138 |
+
generated_text, tokens_used = await loop.run_in_executor(None, do_generation)
|
| 139 |
+
return True, generated_text, tokens_used
|
|
|
|
| 140 |
|
| 141 |
|
| 142 |
# ----------------- Service Layer -----------------
|
|
|
|
| 149 |
await self.model_manager.initialize()
|
| 150 |
|
| 151 |
async def generate_text(self, api_key: str, prompt: str, **kwargs) -> APIResponse:
|
|
|
|
| 152 |
if not api_key or not isinstance(api_key, str) or not api_key.startswith("gsk-"):
|
| 153 |
return APIResponse(success=False, error="Chave de API inválida ou ausente.")
|
| 154 |
try:
|
|
|
|
| 163 |
return APIResponse(success=False, error="Ocorreu um erro interno no serviço.")
|
| 164 |
|
| 165 |
|
| 166 |
+
# ----------------- Build Gradio UI (síncrono) -----------------
|
| 167 |
class GradioInterface:
|
| 168 |
def __init__(self, service: GemmaService):
|
| 169 |
self.service = service
|
|
|
|
| 171 |
def create_custom_css(self) -> str:
|
| 172 |
return """
|
| 173 |
@import url('https://fonts.googleapis.com/css2?family=Material+Icons&display=swap');
|
|
|
|
| 174 |
:root { --dark-bg:#0a0a0a; --panel-bg:#1a1a1a; --border-color:#333; --text-color:#f0f0f0; --text-light:#a0a0a0; --accent-orange:#FF4500; --accent-orange-hover:#FF6347; --code-bg:#282c34; }
|
| 175 |
.gradio-container { background: var(--dark-bg) !important; color: var(--text-color); }
|
| 176 |
+
/* ... rest of CSS (trimmed for brevity) ... */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
#send_button::before { content: "send"; font-family: 'Material Icons', sans-serif; position:absolute; left:12px; top:50%; transform:translateY(-50%); font-size:18px; opacity:0.95; }
|
| 178 |
#generate_button::before { content: "auto_awesome"; font-family: 'Material Icons', sans-serif; position:absolute; left:12px; top:50%; transform:translateY(-50%); font-size:18px; opacity:0.95; }
|
| 179 |
"""
|
| 180 |
|
| 181 |
+
def create_interface(self) -> gr.Blocks:
|
| 182 |
+
# Criar a interface de forma síncrona (não await)
|
| 183 |
+
demo = gr.Blocks(css=self.create_custom_css(), theme=None)
|
| 184 |
+
with demo:
|
| 185 |
with gr.Row(elem_id="main_layout", equal_height=False):
|
| 186 |
with gr.Column(scale=2):
|
| 187 |
with gr.Column(elem_id="left_panel"):
|
|
|
|
| 208 |
|
| 209 |
def handle_key_generation():
|
| 210 |
key = f"gsk-{secrets.token_urlsafe(24).replace('_', '').replace('-', '')}"
|
| 211 |
+
code_html = f"<div class='code-snippet'> ... </div>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
return key, gr.update(value=code_html)
|
| 213 |
|
| 214 |
async def handle_generation(api_key, prompt, temp, max_tokens, top_k, top_p, btn):
|
|
|
|
| 215 |
if not api_key:
|
| 216 |
yield "<p style='color: #FFCC00;'>Por favor, insira a sua chave de API para começar.</p>", gr.update(value="➤ Enviar", interactive=True)
|
| 217 |
return
|
|
|
|
| 228 |
else:
|
| 229 |
yield f"<p style='color: #FF4500;'>{response.error}</p>", gr.update(value="➤ Enviar", interactive=True)
|
| 230 |
|
| 231 |
+
# conectar o callback
|
| 232 |
send_button.click(
|
| 233 |
handle_generation,
|
| 234 |
inputs=[api_key_input, prompt_input, temp_slider, max_tokens_slider, top_k_slider, top_p_slider, send_button],
|
| 235 |
outputs=[output_display, send_button],
|
| 236 |
api_name="generate",
|
| 237 |
)
|
|
|
|
| 238 |
key_button.click(handle_key_generation, outputs=[api_key_input, api_example_display])
|
| 239 |
demo.load(lambda: gr.update(value="<p style='color: #a0a0a0;'>Clique em 'Gerar Nova Chave' para ver um exemplo de código.</p>"), [], [api_example_display])
|
| 240 |
|
| 241 |
return demo
|
| 242 |
|
| 243 |
|
| 244 |
+
# ----------------- FastAPI app and endpoints -----------------
|
| 245 |
+
service = GemmaService()
|
| 246 |
+
gradio_interface = GradioInterface(service)
|
| 247 |
+
gradio_blocks = gradio_interface.create_interface()
|
| 248 |
|
| 249 |
+
app = FastAPI(title="Gemma Service (Gradio + API)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
+
# montar Gradio na raiz "/" - se mount falhar, a UI ainda poderá ser servida pelo Space.
|
| 252 |
+
try:
|
| 253 |
+
gr.mount_gradio_app(app, gradio_blocks, path="/")
|
| 254 |
+
except Exception as exc:
|
| 255 |
+
logger.warning("Não foi possível montar Gradio automaticamente: %s", exc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
+
@app.on_event("startup")
|
| 259 |
+
async def startup_event():
|
| 260 |
+
# inicializa modelo em background (não bloqueia o startup)
|
| 261 |
+
# se preferir aguarde a carga antes de aceitar requests, substitua create_task por await
|
| 262 |
+
asyncio.create_task(service.initialize())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
|
|
|
|
| 264 |
|
| 265 |
+
@app.post("/api/generate")
|
| 266 |
+
async def api_generate(req: Request):
|
| 267 |
+
try:
|
| 268 |
+
body = await req.json()
|
| 269 |
+
except Exception:
|
| 270 |
+
return JSONResponse(status_code=400, content={"success": False, "error": "Payload inválido (JSON esperado)."})
|
| 271 |
|
| 272 |
+
api_key = body.get("api_key")
|
| 273 |
+
prompt = body.get("prompt", "")
|
| 274 |
+
max_tokens = int(body.get("max_tokens", 512))
|
| 275 |
+
temperature = float(body.get("temperature", 0.7))
|
| 276 |
+
top_k = int(body.get("top_k", 50))
|
| 277 |
+
top_p = float(body.get("top_p", 0.95))
|
| 278 |
|
| 279 |
+
resp = await service.generate_text(api_key=api_key, prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_k=top_k, top_p=top_p)
|
| 280 |
+
status = 200 if resp.success else 400
|
| 281 |
+
return JSONResponse(status_code=status, content=resp.dict())
|
| 282 |
|
|
|
|
| 283 |
|
| 284 |
+
@app.post("/run/generate")
|
| 285 |
+
async def gradio_compatible_generate(req: Request):
|
| 286 |
+
try:
|
| 287 |
+
body = await req.json()
|
| 288 |
+
except Exception:
|
| 289 |
+
return JSONResponse(status_code=400, content={"success": False, "error": "Payload inválido (JSON esperado)."})
|
| 290 |
|
| 291 |
+
data = body.get("data")
|
| 292 |
+
if not isinstance(data, list):
|
| 293 |
+
return JSONResponse(status_code=400, content={"success": False, "error": "Campo 'data' inválido. Esperado array."})
|
| 294 |
|
| 295 |
+
try:
|
| 296 |
+
api_key = data[0]
|
| 297 |
+
prompt = data[1] if len(data) > 1 else ""
|
| 298 |
+
max_tokens = int(data[2]) if len(data) > 2 else 512
|
| 299 |
+
temperature = float(data[3]) if len(data) > 3 else 0.7
|
| 300 |
+
top_k = int(data[4]) if len(data) > 4 else 50
|
| 301 |
+
top_p = float(data[5]) if len(data) > 5 else 0.95
|
| 302 |
+
except Exception as e:
|
| 303 |
+
return JSONResponse(status_code=400, content={"success": False, "error": f"Erro ao parsear 'data': {e}"})
|
| 304 |
+
|
| 305 |
+
resp = await service.generate_text(api_key=api_key, prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_k=top_k, top_p=top_p)
|
| 306 |
+
status = 200 if resp.success else 400
|
| 307 |
+
return JSONResponse(status_code=status, content=resp.dict())
|