GuXSs commited on
Commit
8e6d932
·
verified ·
1 Parent(s): b94ff58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -186
app.py CHANGED
@@ -1,169 +1,159 @@
1
  import os
2
- import uuid
3
- import json
4
  import asyncio
5
  import logging
6
- import time
7
- from datetime import datetime, timedelta
8
- from typing import Dict, List, Optional, Tuple, Any
9
  from dataclasses import dataclass
 
10
 
11
  import gradio as gr
12
- import aiohttp
13
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
- from dotenv import load_dotenv
15
- from pydantic import BaseModel, ValidationError
16
- import secrets
17
- import plotly.graph_objects as go
18
- from plotly.subplots import make_subplots
19
 
20
  # ----------------- Configuration & Models -----------------
21
- load_dotenv()
22
 
23
  @dataclass
24
  class Config:
25
  HF_TOKEN: str = os.getenv("HF_TOKEN", "")
26
  MODEL_NAME: str = os.getenv("MODEL_NAME", "google/gemma-3-270m-it")
27
- MAX_TOKENS: int = int(os.getenv("MAX_TOKENS", "1500"))
28
  LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
29
 
 
30
  class GenerationRequest(BaseModel):
31
  prompt: str
32
- max_tokens: int = 500
33
- temperature: float = 0.75
34
  top_k: int = 50
35
  top_p: float = 0.95
36
- repetition_penalty: float = 1.1
37
 
38
  class APIResponse(BaseModel):
39
  success: bool
40
  data: Any = None
41
  error: Optional[str] = None
42
- timestamp: datetime = datetime.now()
43
 
44
  # ----------------- Enhanced Logger -----------------
 
45
  def setup_logger():
46
  logging.basicConfig(
47
  level=getattr(logging, Config().LOG_LEVEL),
48
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
49
- handlers=[
50
- logging.FileHandler('gemma_saas.log'),
51
- logging.StreamHandler()
52
- ]
53
  )
54
  return logging.getLogger(__name__)
55
 
 
56
  logger = setup_logger()
57
 
 
58
  # ----------------- Model Manager -----------------
59
  class ModelManager:
60
  def __init__(self, config: Config):
61
  self.config = config
62
- self.tokenizer = None
63
- self.model = None
64
  self.pipeline = None
65
  self.model_loaded = False
66
 
67
  async def initialize(self):
68
- """Initialize the model, tokenizer, and pipeline asynchronously."""
69
  if not self.config.HF_TOKEN:
70
- logger.error("Hugging Face token not found. Model loading will fail.")
71
- self.model_loaded = False
72
  return
 
73
  try:
74
- logger.info(f"Loading model: {self.config.MODEL_NAME}...")
75
- loop = asyncio.get_event_loop()
76
-
77
- def load_components():
78
- tokenizer = AutoTokenizer.from_pretrained(self.config.MODEL_NAME, token=self.config.HF_TOKEN)
79
- model = AutoModelForCausalLM.from_pretrained(
80
- self.config.MODEL_NAME,
81
- token=self.config.HF_TOKEN,
82
  device_map="auto",
83
- torch_dtype="auto"
84
- )
85
- text_pipeline = pipeline(
86
- "text-generation",
87
- model=model,
88
- tokenizer=tokenizer,
89
  )
90
- return tokenizer, model, text_pipeline
91
 
92
- self.tokenizer, self.model, self.pipeline = await loop.run_in_executor(None, load_components)
93
  self.model_loaded = True
94
- logger.info("✅ Model loaded successfully!")
95
  except Exception as e:
96
- logger.error(f"❌ Error loading model: {e}")
97
  self.model_loaded = False
98
 
99
  async def generate(self, request: GenerationRequest) -> Tuple[bool, str, int]:
100
- """Generate text based on the provided request."""
101
- if not self.model_loaded:
102
  return False, "❌ O modelo não está disponível. Por favor, verifique os logs do servidor.", 0
103
- try:
104
- if not request.prompt.strip():
105
- return False, "⚠️ O prompt não pode estar vazio.", 0
106
- if len(request.prompt) > 8000:
107
- return False, "⚠️ O prompt é muito longo (máximo de 8000 caracteres).", 0
108
 
109
- loop = asyncio.get_event_loop()
110
-
111
- messages = [
112
- {"role": "user", "content": request.prompt.strip()},
113
- ]
114
 
115
  def do_generation():
116
- prompt = self.pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
117
  outputs = self.pipeline(
118
- prompt,
119
  max_new_tokens=min(request.max_tokens, self.config.MAX_TOKENS),
120
  do_sample=True,
121
  temperature=request.temperature,
122
  top_k=request.top_k,
123
  top_p=request.top_p,
124
  )
125
- return outputs[0]["generated_text"][len(prompt):]
126
 
127
- generated_text = await loop.run_in_executor(None, do_generation)
128
- tokens_used = len(self.tokenizer.encode(generated_text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  return True, generated_text, tokens_used
 
130
  except Exception as e:
131
- logger.error(f"Generation error: {e}")
132
  return False, f"❌ A geração falhou: {str(e)}", 0
133
 
 
134
  # ----------------- Service Layer -----------------
135
  class GemmaService:
136
  def __init__(self):
137
  self.config = Config()
138
  self.model_manager = ModelManager(self.config)
139
- self._validate_config()
140
-
141
- def _validate_config(self):
142
- """Validate that required environment variables are set."""
143
- if not self.config.HF_TOKEN:
144
- raise ValueError("Missing required environment variable: HF_TOKEN")
145
 
146
  async def initialize(self):
147
  await self.model_manager.initialize()
148
 
149
- async def generate_text(self, prompt: str, **kwargs) -> APIResponse:
150
- """Generate text directly."""
 
 
151
  try:
152
  request = GenerationRequest(prompt=prompt, **kwargs)
153
  success, text, tokens_used = await self.model_manager.generate(request)
154
-
155
  if success:
156
- return APIResponse(
157
- success=True,
158
- data={"generated_text": text, "tokens_used": tokens_used}
159
- )
160
  else:
161
  return APIResponse(success=False, error=text)
162
-
163
  except Exception as e:
164
- logger.error(f"Service error during text generation: {e}")
165
  return APIResponse(success=False, error="Ocorreu um erro interno no serviço.")
166
 
 
167
  # ----------------- Enhanced UI -----------------
168
  class GradioInterface:
169
  def __init__(self, service: GemmaService):
@@ -172,139 +162,132 @@ class GradioInterface:
172
  def create_custom_css(self):
173
  return """
174
  :root {
175
- --dark-bg: #111111;
176
- --panel-bg: #1C1C1C;
177
- --border-color: #333333;
178
- --text-color: #E0E0E0;
179
- --text-light: #A0A0A0;
180
- --accent-orange: #FF4500;
181
- --accent-orange-hover: #FF6347;
182
  }
183
- .gradio-container { background-color: var(--dark-bg) !important; }
184
- #main_layout { background-color: transparent; border: none !important; box-shadow: none !important; }
185
- #right_panel { background-color: var(--panel-bg); border-left: 1px solid var(--border-color); border-radius: 12px; padding: 2rem !important; }
186
- #left_panel { background-color: var(--panel-bg); border-radius: 12px; padding: 1rem !important; display: flex !important; flex-direction: column !important; height: 70vh; }
187
- #output_display { flex-grow: 1; overflow-y: auto; padding: 1rem; color: var(--text-color); }
188
- #output_display p { margin-bottom: 1rem; line-height: 1.6; }
189
- #prompt_row { border-top: 1px solid var(--border-color); padding-top: 1rem; }
190
- #prompt_input textarea { background-color: #2C2C2C !important; border-color: var(--border-color) !important; color: var(--text-color) !important; border-radius: 8px !important; }
191
- #send_button { background-color: var(--accent-orange); color: white; border: none; border-radius: 50% !important; width: 50px !important; height: 50px !important; min-width: 50px !important; transition: background-color 0.3s ease; }
192
  #send_button:hover { background-color: var(--accent-orange-hover); }
193
  #generate_button {
194
- background: linear-gradient(135deg, var(--accent-orange), var(--accent-orange-hover));
195
- color: white !important;
196
- font-size: 1.2rem !important;
197
- font-weight: bold !important;
198
- border: none;
199
- border-radius: 12px !important;
200
- padding: 1rem !important;
201
- box-shadow: 0 4px 15px rgba(255, 69, 0, 0.4);
202
- transition: all 0.3s ease;
203
- }
204
- #generate_button:hover {
205
- transform: translateY(-2px);
206
- box-shadow: 0 6px 20px rgba(255, 69, 0, 0.6);
207
  }
208
- .gr-label { color: var(--text-light) !important; }
209
- h2 { color: white; border-bottom: 1px solid var(--border-color); padding-bottom: 0.5rem; margin-bottom: 1rem; }
210
- #info_text { color: var(--text-light); line-height: 1.7; }
 
 
211
  """
212
 
213
- async def create_interface(self):
214
  with gr.Blocks(css=self.create_custom_css(), theme=None) as app:
215
  with gr.Row(elem_id="main_layout", equal_height=False):
216
- with gr.Column(scale=2, elem_id="left_panel_container"):
217
  with gr.Column(elem_id="left_panel"):
218
- output_display = gr.Markdown(elem_id="output_display", value="<p style='color: #A0A0A0;'>Sua resposta aparecerá aqui...</p>")
219
- with gr.Row(elem_id="prompt_row"):
220
- prompt_input = gr.Textbox(
221
- show_label=False,
222
- placeholder="Digite sua mensagem aqui...",
223
- elem_id="prompt_input",
224
- scale=10
225
- )
226
- send_button = gr.Button("➤", elem_id="send_button", scale=1)
227
-
228
- with gr.Column(scale=1, elem_id="right_panel"):
229
- gr.Markdown("## Informações")
230
- gr.Markdown(
231
- """
232
- Este é um ambiente interativo para o modelo de linguagem **Gemma**.
233
-
234
- - **Como usar:** Digite seu prompt na caixa de texto à esquerda e clique no botão de envio para gerar uma resposta.
235
- - **Gerar Chave:** Use o botão abaixo para gerar uma chave de API de exemplo.
236
- """,
237
- elem_id="info_text"
238
- )
239
- key_button = gr.Button("✨ Gerar Key", elem_id="generate_button")
240
- key_display = gr.Markdown()
241
-
242
-
243
- # --- Event Handlers ---
244
- async def handle_generation(prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  if not prompt:
246
- # FIX: Use yield and return to exit the generator correctly
247
- yield "<p style='color: #FFCC00;'>Por favor, digite um prompt para começar.</p>"
248
  return
249
-
250
- # Show a loading indicator
251
- yield "<p style='color: #A0A0A0;'>Gerando resposta...</p>"
252
 
253
- response = await self.service.generate_text(prompt=prompt)
254
-
255
- if response.success:
256
- yield response.data["generated_text"]
257
- else:
258
- yield f"<p style='color: #FF4500;'>{response.error}</p>"
259
 
260
- def handle_key_generation():
261
- """Generates a random API key in the specified format."""
262
- random_part = secrets.token_urlsafe(24).replace("_", "").replace("-", "")
263
- key = f"gsk-{random_part}"
264
- return f"<p style='color: #A0A0A0; text-align: center; margin-top: 1rem;'>Sua chave de exemplo:</p><pre style='background: #2C2C2C; padding: 1rem; border-radius: 8px; text-align: center; word-wrap: break-word;'><code>{key}</code></pre>"
 
 
 
 
265
 
 
 
 
 
 
266
 
267
- # --- Wiring ---
268
- key_button.click(
269
- handle_key_generation,
270
- inputs=[],
271
- outputs=[key_display]
272
- )
273
  send_button.click(
274
  handle_generation,
275
- inputs=[prompt_input],
276
- outputs=[output_display]
277
- )
278
- prompt_input.submit(
279
- handle_generation,
280
- inputs=[prompt_input],
281
- outputs=[output_display]
282
  )
283
 
 
 
284
  return app
285
 
 
286
  # ----------------- Main Application -----------------
287
- async def main():
288
- """Main application entry point"""
289
  try:
290
  service = GemmaService()
291
- await service.initialize()
292
-
 
293
  interface = GradioInterface(service)
294
- app = await interface.create_interface()
295
-
296
- app.launch(
297
- server_name="0.0.0.0",
298
- server_port=7860,
299
- share=False,
300
- debug=False,
301
- show_error=True
302
- )
303
  except Exception as e:
304
- logger.critical(f"Failed to start application: {e}", exc_info=True)
305
- raise
306
 
307
  if __name__ == "__main__":
308
- # To run this, you need a .env file with:
309
- # HF_TOKEN="your_hugging_face_token"
310
- asyncio.run(main())
 
1
  import os
2
+ import secrets
3
+ import html
4
  import asyncio
5
  import logging
 
 
 
6
  from dataclasses import dataclass
7
+ from typing import Any, Optional, Tuple
8
 
9
  import gradio as gr
10
+ from transformers import pipeline
11
+ from pydantic import BaseModel
 
 
 
 
 
12
 
13
  # ----------------- Configuration & Models -----------------
 
14
 
15
  @dataclass
16
  class Config:
17
  HF_TOKEN: str = os.getenv("HF_TOKEN", "")
18
  MODEL_NAME: str = os.getenv("MODEL_NAME", "google/gemma-3-270m-it")
19
+ MAX_TOKENS: int = int(os.getenv("MAX_TOKENS", "2048"))
20
  LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
21
 
22
+
23
  class GenerationRequest(BaseModel):
24
  prompt: str
25
+ max_tokens: int = 512
26
+ temperature: float = 0.7
27
  top_k: int = 50
28
  top_p: float = 0.95
29
+
30
 
31
  class APIResponse(BaseModel):
32
  success: bool
33
  data: Any = None
34
  error: Optional[str] = None
35
+
36
 
37
  # ----------------- Enhanced Logger -----------------
38
+
39
  def setup_logger():
40
  logging.basicConfig(
41
  level=getattr(logging, Config().LOG_LEVEL),
42
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
43
+ handlers=[logging.FileHandler('gemma_saas.log'), logging.StreamHandler()]
 
 
 
44
  )
45
  return logging.getLogger(__name__)
46
 
47
+
48
  logger = setup_logger()
49
 
50
+
51
  # ----------------- Model Manager -----------------
52
  class ModelManager:
53
  def __init__(self, config: Config):
54
  self.config = config
 
 
55
  self.pipeline = None
56
  self.model_loaded = False
57
 
58
  async def initialize(self):
 
59
  if not self.config.HF_TOKEN:
60
+ logger.error("Token do Hugging Face não encontrado. O carregamento do modelo irá falhar.")
 
61
  return
62
+
63
  try:
64
+ logger.info(f"A carregar o modelo: {self.config.MODEL_NAME}...")
65
+ loop = asyncio.get_running_loop()
66
+
67
+ def load_pipeline():
68
+ # Use `use_auth_token` (aplicável em muitas versões do transformers)
69
+ return pipeline(
70
+ task="text-generation",
71
+ model=self.config.MODEL_NAME,
72
  device_map="auto",
73
+ model_kwargs={"torch_dtype": "auto"},
74
+ use_auth_token=self.config.HF_TOKEN,
 
 
 
 
75
  )
 
76
 
77
+ self.pipeline = await loop.run_in_executor(None, load_pipeline)
78
  self.model_loaded = True
79
+ logger.info("✅ Modelo carregado com sucesso!")
80
  except Exception as e:
81
+ logger.error(f"❌ Erro ao carregar o modelo: {e}")
82
  self.model_loaded = False
83
 
84
  async def generate(self, request: GenerationRequest) -> Tuple[bool, str, int]:
85
+ if not self.model_loaded or self.pipeline is None:
 
86
  return False, "❌ O modelo não está disponível. Por favor, verifique os logs do servidor.", 0
 
 
 
 
 
87
 
88
+ if not request.prompt.strip():
89
+ return False, "⚠️ O prompt não pode estar vazio.", 0
90
+
91
+ try:
92
+ loop = asyncio.get_running_loop()
93
 
94
  def do_generation():
95
+ # Para a maioria dos modelos de geração textual, passamos o prompt diretamente
96
+ prompt_text = request.prompt.strip()
97
+
98
  outputs = self.pipeline(
99
+ prompt_text,
100
  max_new_tokens=min(request.max_tokens, self.config.MAX_TOKENS),
101
  do_sample=True,
102
  temperature=request.temperature,
103
  top_k=request.top_k,
104
  top_p=request.top_p,
105
  )
 
106
 
107
+ # A saída típica é uma lista com dicionários contendo 'generated_text'
108
+ generated_text = outputs[0].get("generated_text", "")
109
+
110
+ # Contagem aproximada de tokens (usa o tokenizer do pipeline se disponível)
111
+ tokens_used = 0
112
+ try:
113
+ tokenizer = getattr(self.pipeline, "tokenizer", None)
114
+ if tokenizer is not None:
115
+ # Evitar adicionar special tokens na contagem
116
+ tokens_used = len(tokenizer.encode(generated_text, add_special_tokens=False))
117
+ else:
118
+ tokens_used = len(generated_text.split())
119
+ except Exception:
120
+ tokens_used = len(generated_text.split())
121
+
122
+ return generated_text, tokens_used
123
+
124
+ generated_text, tokens_used = await loop.run_in_executor(None, do_generation)
125
  return True, generated_text, tokens_used
126
+
127
  except Exception as e:
128
+ logger.error(f"Erro na geração: {e}")
129
  return False, f"❌ A geração falhou: {str(e)}", 0
130
 
131
+
132
  # ----------------- Service Layer -----------------
133
  class GemmaService:
134
  def __init__(self):
135
  self.config = Config()
136
  self.model_manager = ModelManager(self.config)
 
 
 
 
 
 
137
 
138
  async def initialize(self):
139
  await self.model_manager.initialize()
140
 
141
+ async def generate_text(self, api_key: str, prompt: str, **kwargs) -> APIResponse:
142
+ if not api_key or not api_key.startswith("gsk-"):
143
+ return APIResponse(success=False, error="Chave de API inválida ou ausente.")
144
+
145
  try:
146
  request = GenerationRequest(prompt=prompt, **kwargs)
147
  success, text, tokens_used = await self.model_manager.generate(request)
 
148
  if success:
149
+ return APIResponse(success=True, data={"generated_text": text, "tokens_used": tokens_used})
 
 
 
150
  else:
151
  return APIResponse(success=False, error=text)
 
152
  except Exception as e:
153
+ logger.error(f"Erro de serviço durante a geração de texto: {e}")
154
  return APIResponse(success=False, error="Ocorreu um erro interno no serviço.")
155
 
156
+
157
  # ----------------- Enhanced UI -----------------
158
  class GradioInterface:
159
  def __init__(self, service: GemmaService):
 
162
  def create_custom_css(self):
163
  return """
164
  :root {
165
+ --dark-bg: #0a0a0a; --panel-bg: #1a1a1a; --border-color: #333;
166
+ --text-color: #f0f0f0; --text-light: #a0a0a0; --accent-orange: #FF4500;
167
+ --accent-orange-hover: #FF6347; --code-bg: #282c34;
 
 
 
 
168
  }
169
+ .gradio-container { background: var(--dark-bg) !important; color: var(--text-color); }
170
+ #main_layout { background: transparent; border: none !important; box-shadow: none !important; gap: 2rem; }
171
+ #right_panel, #left_panel { background: var(--panel-bg); border: 1px solid var(--border-color); border-radius: 16px; padding: 2rem !important; }
172
+ #left_panel { display: flex !important; flex-direction: column !important; height: 80vh; }
173
+ #output_display { flex-grow: 1; overflow-y: auto; padding-right: 1rem; color: var(--text-color); }
174
+ #output_display p { margin-bottom: 1rem; line-height: 1.7; }
175
+ #input_area { margin-top: 1rem; }
176
+ #api_key_input textarea, #prompt_input textarea { background-color: #2C2C2C !important; border-color: var(--border-color) !important; color: var(--text-color) !important; border-radius: 12px !important; }
177
+ #send_button { background: var(--accent-orange); color: white; border: none; border-radius: 12px !important; transition: background-color 0.3s ease; }
178
  #send_button:hover { background-color: var(--accent-orange-hover); }
179
  #generate_button {
180
+ background: linear-gradient(135deg, var(--accent-orange), var(--accent-orange-hover)); color: white !important;
181
+ font-size: 1.1rem !important; font-weight: bold !important; border: none; border-radius: 12px !important;
182
+ padding: 1rem !important; box-shadow: 0 4px 15px rgba(255, 69, 0, 0.4); transition: all 0.3s ease;
 
 
 
 
 
 
 
 
 
 
183
  }
184
+ #generate_button:hover { transform: translateY(-2px); box-shadow: 0 6px 20px rgba(255, 69, 0, 0.6); }
185
+ h2, h3 { color: white; border-bottom: 1px solid var(--border-color); padding-bottom: 0.75rem; margin-bottom: 1.5rem; font-weight: 600; }
186
+ .code-snippet { background-color: var(--code-bg); color: #abb2bf; padding: 1.5rem; border-radius: 12px; font-family: 'Courier New', monospace; white-space: pre-wrap; word-wrap: break-word; border: 1px solid var(--border-color); }
187
+ .code-snippet .keyword { color: #c678dd; } .code-snippet .string { color: #98c379; } .code-snippet .number { color: #d19a66; }
188
+ .gr-slider { color: var(--text-light); }
189
  """
190
 
191
+ def create_interface(self):
192
  with gr.Blocks(css=self.create_custom_css(), theme=None) as app:
193
  with gr.Row(elem_id="main_layout", equal_height=False):
194
+ with gr.Column(scale=2):
195
  with gr.Column(elem_id="left_panel"):
196
+ output_display = gr.Markdown(elem_id="output_display", value="<p style='color: #a0a0a0;'>A sua resposta aparecerá aqui...</p>")
197
+ with gr.Column(elem_id="input_area"):
198
+ api_key_input = gr.Textbox(label="A Sua Chave de API", placeholder="Cole a sua chave gsk-... aqui", type="password", elem_id="api_key_input")
199
+ with gr.Row():
200
+ prompt_input = gr.Textbox(show_label=False, placeholder="Digite a sua mensagem...", elem_id="prompt_input", scale=10)
201
+ send_button = gr.Button("➤ Enviar", elem_id="send_button", scale=2)
202
+
203
+ with gr.Column(scale=1):
204
+ with gr.Column(elem_id="right_panel"):
205
+ gr.Markdown("## Controlo")
206
+ key_button = gr.Button("✨ Gerar Nova Chave", elem_id="generate_button")
207
+
208
+ with gr.Accordion("Parâmetros Avançados", open=False):
209
+ temp_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperatura")
210
+ max_tokens_slider = gr.Slider(minimum=64, maximum=self.service.config.MAX_TOKENS, value=512, step=64, label="Max Tokens")
211
+ top_k_slider = gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top-K")
212
+ top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
213
+
214
+ gr.Markdown("### Como Usar a API")
215
+ api_example_display = gr.HTML("<p style='color: #a0a0a0;'>Clique em 'Gerar Nova Chave' para ver um exemplo de código.</p>")
216
+
217
+ def handle_key_generation():
218
+ key = f"gsk-{secrets.token_urlsafe(24).replace('_', '').replace('-', '')}"
219
+ code_html = f"""
220
+ <div class=\"code-snippet\">
221
+ <div><span class=\"keyword\">import</span> requests</div>
222
+ <div>&nbsp;</div>
223
+ <div>url = <span class=\"string\">\"https://SEU_SPACE.hf.space/run/generate\"</span></div>
224
+ <div>payload = {{</div>
225
+ <div>&nbsp;&nbsp;&nbsp;&nbsp;<span class=\"string\">\"api_key\"</span>: <span class=\"string\">\"{key}\"</span>,</div>
226
+ <div>&nbsp;&nbsp;&nbsp;&nbsp;<span class=\"string\">\"prompt\"</span>: <span class=\"string\">\"Escreva um haikai sobre o universo\"</span>,</div>
227
+ <div>&nbsp;&nbsp;&nbsp;&nbsp;<span class=\"string\">\"max_tokens\"</span>: <span class=\"number\">50</span></div>
228
+ <div>}}</div>
229
+ <div>&nbsp;</div>
230
+ <div>response = requests.post(url, json=payload)</div>
231
+ <div><span class=\"keyword\">print</span>(response.json())</div>
232
+ </div>
233
+ """
234
+
235
+ return gr.Textbox.update(value=key, interactive=True), api_example_display.update(value=code_html)
236
+
237
+ async def handle_generation(api_key, prompt, temp, max_tokens, top_k, top_p, btn):
238
+ if not api_key:
239
+ yield "<p style='color: #FFCC00;'>Por favor, insira a sua chave de API para começar.</p>", gr.Button.update(value="➤ Enviar", interactive=True)
240
+ return
241
  if not prompt:
242
+ yield "<p style='color: #FFCC00;'>Por favor, digite um prompt.</p>", gr.Button.update(value="➤ Enviar", interactive=True)
 
243
  return
 
 
 
244
 
245
+ yield "<p style='color: #a0a0a0;'>A gerar resposta...</p>", gr.Button.update(value="A gerar...", interactive=False)
 
 
 
 
 
246
 
247
+ # chama o serviço de geração
248
+ response = await self.service.generate_text(
249
+ api_key=api_key,
250
+ prompt=prompt,
251
+ temperature=float(temp),
252
+ max_tokens=int(max_tokens),
253
+ top_k=int(top_k),
254
+ top_p=float(top_p),
255
+ )
256
 
257
+ if response.success:
258
+ formatted_text = html.escape(response.data["generated_text"]).replace("\n", "<br>")
259
+ yield formatted_text, gr.Button.update(value="➤ Enviar", interactive=True)
260
+ else:
261
+ yield f"<p style='color: #FF4500;'>{response.error}</p>", gr.Button.update(value="➤ Enviar", interactive=True)
262
 
 
 
 
 
 
 
263
  send_button.click(
264
  handle_generation,
265
+ inputs=[api_key_input, prompt_input, temp_slider, max_tokens_slider, top_k_slider, top_p_slider, send_button],
266
+ outputs=[output_display, send_button],
267
+ api_name="generate",
 
 
 
 
268
  )
269
 
270
+ key_button.click(handle_key_generation, outputs=[api_key_input, api_example_display])
271
+
272
  return app
273
 
274
+
275
  # ----------------- Main Application -----------------
276
+
277
+ def main():
278
  try:
279
  service = GemmaService()
280
+ # inicializa o modelo (bloqueante, mas necessário antes de lançar a UI)
281
+ asyncio.run(service.initialize())
282
+
283
  interface = GradioInterface(service)
284
+ app = interface.create_interface()
285
+
286
+ # Lança a aplicação Gradio (bloqueia até terminar)
287
+ app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=False, show_error=True)
 
 
 
 
 
288
  except Exception as e:
289
+ logger.critical(f"Falha ao iniciar a aplicação: {e}", exc_info=True)
290
+
291
 
292
  if __name__ == "__main__":
293
+ main()