GuXSs commited on
Commit
684060d
·
verified ·
1 Parent(s): 8e6d932

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -74
app.py CHANGED
@@ -1,16 +1,21 @@
1
  import os
2
  import secrets
3
- import html
4
- import asyncio
5
  import logging
 
 
 
6
  from dataclasses import dataclass
 
7
  from typing import Any, Optional, Tuple
8
 
9
  import gradio as gr
10
  from transformers import pipeline
 
11
  from pydantic import BaseModel
12
 
13
  # ----------------- Configuration & Models -----------------
 
 
14
 
15
  @dataclass
16
  class Config:
@@ -35,14 +40,29 @@ class APIResponse(BaseModel):
35
 
36
 
37
  # ----------------- Enhanced Logger -----------------
 
 
 
 
 
 
 
 
38
 
39
- def setup_logger():
40
- logging.basicConfig(
41
- level=getattr(logging, Config().LOG_LEVEL),
42
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
43
- handlers=[logging.FileHandler('gemma_saas.log'), logging.StreamHandler()]
44
- )
45
- return logging.getLogger(__name__)
 
 
 
 
 
 
 
46
 
47
 
48
  logger = setup_logger()
@@ -55,22 +75,26 @@ class ModelManager:
55
  self.pipeline = None
56
  self.model_loaded = False
57
 
58
- async def initialize(self):
59
  if not self.config.HF_TOKEN:
60
- logger.error("Token do Hugging Face não encontrado. O carregamento do modelo irá falhar.")
 
 
61
  return
62
 
63
  try:
64
  logger.info(f"A carregar o modelo: {self.config.MODEL_NAME}...")
65
- loop = asyncio.get_running_loop()
 
66
 
67
  def load_pipeline():
68
- # Use `use_auth_token` (aplicável em muitas versões do transformers)
 
69
  return pipeline(
70
- task="text-generation",
71
  model=self.config.MODEL_NAME,
72
- device_map="auto",
73
  model_kwargs={"torch_dtype": "auto"},
 
74
  use_auth_token=self.config.HF_TOKEN,
75
  )
76
 
@@ -78,22 +102,34 @@ class ModelManager:
78
  self.model_loaded = True
79
  logger.info("✅ Modelo carregado com sucesso!")
80
  except Exception as e:
81
- logger.error(f"❌ Erro ao carregar o modelo: {e}")
82
- self.model_loaded = False
83
 
84
  async def generate(self, request: GenerationRequest) -> Tuple[bool, str, int]:
85
  if not self.model_loaded or self.pipeline is None:
86
- return False, "❌ O modelo não está disponível. Por favor, verifique os logs do servidor.", 0
87
-
88
- if not request.prompt.strip():
89
- return False, "⚠️ O prompt não pode estar vazio.", 0
 
90
 
91
  try:
92
- loop = asyncio.get_running_loop()
 
 
 
 
93
 
94
  def do_generation():
95
- # Para a maioria dos modelos de geração textual, passamos o prompt diretamente
96
- prompt_text = request.prompt.strip()
 
 
 
 
 
 
 
 
97
 
98
  outputs = self.pipeline(
99
  prompt_text,
@@ -104,20 +140,17 @@ class ModelManager:
104
  top_p=request.top_p,
105
  )
106
 
107
- # A saída típica é uma lista com dicionários contendo 'generated_text'
108
  generated_text = outputs[0].get("generated_text", "")
 
 
 
109
 
110
- # Contagem aproximada de tokens (usa o tokenizer do pipeline se disponível)
111
  tokens_used = 0
112
- try:
113
- tokenizer = getattr(self.pipeline, "tokenizer", None)
114
- if tokenizer is not None:
115
- # Evitar adicionar special tokens na contagem
116
- tokens_used = len(tokenizer.encode(generated_text, add_special_tokens=False))
117
- else:
118
- tokens_used = len(generated_text.split())
119
- except Exception:
120
- tokens_used = len(generated_text.split())
121
 
122
  return generated_text, tokens_used
123
 
@@ -125,7 +158,7 @@ class ModelManager:
125
  return True, generated_text, tokens_used
126
 
127
  except Exception as e:
128
- logger.error(f"Erro na geração: {e}")
129
  return False, f"❌ A geração falhou: {str(e)}", 0
130
 
131
 
@@ -141,16 +174,17 @@ class GemmaService:
141
  async def generate_text(self, api_key: str, prompt: str, **kwargs) -> APIResponse:
142
  if not api_key or not api_key.startswith("gsk-"):
143
  return APIResponse(success=False, error="Chave de API inválida ou ausente.")
144
-
145
  try:
146
  request = GenerationRequest(prompt=prompt, **kwargs)
147
  success, text, tokens_used = await self.model_manager.generate(request)
148
  if success:
149
- return APIResponse(success=True, data={"generated_text": text, "tokens_used": tokens_used})
 
 
150
  else:
151
  return APIResponse(success=False, error=text)
152
  except Exception as e:
153
- logger.error(f"Erro de serviço durante a geração de texto: {e}")
154
  return APIResponse(success=False, error="Ocorreu um erro interno no serviço.")
155
 
156
 
@@ -159,7 +193,7 @@ class GradioInterface:
159
  def __init__(self, service: GemmaService):
160
  self.service = service
161
 
162
- def create_custom_css(self):
163
  return """
164
  :root {
165
  --dark-bg: #0a0a0a; --panel-bg: #1a1a1a; --border-color: #333;
@@ -188,16 +222,29 @@ class GradioInterface:
188
  .gr-slider { color: var(--text-light); }
189
  """
190
 
191
- def create_interface(self):
192
  with gr.Blocks(css=self.create_custom_css(), theme=None) as app:
193
  with gr.Row(elem_id="main_layout", equal_height=False):
194
  with gr.Column(scale=2):
195
  with gr.Column(elem_id="left_panel"):
196
- output_display = gr.Markdown(elem_id="output_display", value="<p style='color: #a0a0a0;'>A sua resposta aparecerá aqui...</p>")
 
 
 
197
  with gr.Column(elem_id="input_area"):
198
- api_key_input = gr.Textbox(label="A Sua Chave de API", placeholder="Cole a sua chave gsk-... aqui", type="password", elem_id="api_key_input")
 
 
 
 
 
199
  with gr.Row():
200
- prompt_input = gr.Textbox(show_label=False, placeholder="Digite a sua mensagem...", elem_id="prompt_input", scale=10)
 
 
 
 
 
201
  send_button = gr.Button("➤ Enviar", elem_id="send_button", scale=2)
202
 
203
  with gr.Column(scale=1):
@@ -206,60 +253,79 @@ class GradioInterface:
206
  key_button = gr.Button("✨ Gerar Nova Chave", elem_id="generate_button")
207
 
208
  with gr.Accordion("Parâmetros Avançados", open=False):
209
- temp_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperatura")
210
- max_tokens_slider = gr.Slider(minimum=64, maximum=self.service.config.MAX_TOKENS, value=512, step=64, label="Max Tokens")
211
- top_k_slider = gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top-K")
212
- top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
 
 
 
 
 
 
 
 
213
 
214
  gr.Markdown("### Como Usar a API")
215
- api_example_display = gr.HTML("<p style='color: #a0a0a0;'>Clique em 'Gerar Nova Chave' para ver um exemplo de código.</p>")
 
 
216
 
217
  def handle_key_generation():
 
218
  key = f"gsk-{secrets.token_urlsafe(24).replace('_', '').replace('-', '')}"
219
  code_html = f"""
220
- <div class=\"code-snippet\">
221
- <div><span class=\"keyword\">import</span> requests</div>
222
  <div>&nbsp;</div>
223
- <div>url = <span class=\"string\">\"https://SEU_SPACE.hf.space/run/generate\"</span></div>
224
  <div>payload = {{</div>
225
- <div>&nbsp;&nbsp;&nbsp;&nbsp;<span class=\"string\">\"api_key\"</span>: <span class=\"string\">\"{key}\"</span>,</div>
226
- <div>&nbsp;&nbsp;&nbsp;&nbsp;<span class=\"string\">\"prompt\"</span>: <span class=\"string\">\"Escreva um haikai sobre o universo\"</span>,</div>
227
- <div>&nbsp;&nbsp;&nbsp;&nbsp;<span class=\"string\">\"max_tokens\"</span>: <span class=\"number\">50</span></div>
228
  <div>}}</div>
229
  <div>&nbsp;</div>
230
  <div>response = requests.post(url, json=payload)</div>
231
- <div><span class=\"keyword\">print</span>(response.json())</div>
232
  </div>
233
  """
234
-
235
- return gr.Textbox.update(value=key, interactive=True), api_example_display.update(value=code_html)
236
 
237
  async def handle_generation(api_key, prompt, temp, max_tokens, top_k, top_p, btn):
 
238
  if not api_key:
239
- yield "<p style='color: #FFCC00;'>Por favor, insira a sua chave de API para começar.</p>", gr.Button.update(value="➤ Enviar", interactive=True)
 
 
 
240
  return
 
241
  if not prompt:
242
- yield "<p style='color: #FFCC00;'>Por favor, digite um prompt.</p>", gr.Button.update(value="➤ Enviar", interactive=True)
 
 
 
243
  return
244
 
245
- yield "<p style='color: #a0a0a0;'>A gerar resposta...</p>", gr.Button.update(value="A gerar...", interactive=False)
 
246
 
247
- # chama o serviço de geração
248
  response = await self.service.generate_text(
249
  api_key=api_key,
250
  prompt=prompt,
251
- temperature=float(temp),
252
  max_tokens=int(max_tokens),
253
  top_k=int(top_k),
254
- top_p=float(top_p),
255
  )
256
 
257
  if response.success:
258
  formatted_text = html.escape(response.data["generated_text"]).replace("\n", "<br>")
259
- yield formatted_text, gr.Button.update(value="➤ Enviar", interactive=True)
260
  else:
261
- yield f"<p style='color: #FF4500;'>{response.error}</p>", gr.Button.update(value="➤ Enviar", interactive=True)
262
 
 
263
  send_button.click(
264
  handle_generation,
265
  inputs=[api_key_input, prompt_input, temp_slider, max_tokens_slider, top_k_slider, top_p_slider, send_button],
@@ -269,25 +335,30 @@ class GradioInterface:
269
 
270
  key_button.click(handle_key_generation, outputs=[api_key_input, api_example_display])
271
 
 
 
 
 
 
 
 
272
  return app
273
 
274
 
275
  # ----------------- Main Application -----------------
276
-
277
- def main():
278
  try:
279
  service = GemmaService()
280
- # inicializa o modelo (bloqueante, mas necessário antes de lançar a UI)
281
- asyncio.run(service.initialize())
282
 
283
  interface = GradioInterface(service)
284
- app = interface.create_interface()
285
 
286
- # Lança a aplicação Gradio (bloqueia até terminar)
287
- app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=False, show_error=True)
288
  except Exception as e:
289
  logger.critical(f"Falha ao iniciar a aplicação: {e}", exc_info=True)
290
 
291
 
292
  if __name__ == "__main__":
293
- main()
 
1
  import os
2
  import secrets
 
 
3
  import logging
4
+ import asyncio
5
+ import html
6
+
7
  from dataclasses import dataclass
8
+ from datetime import datetime
9
  from typing import Any, Optional, Tuple
10
 
11
  import gradio as gr
12
  from transformers import pipeline
13
+ from dotenv import load_dotenv
14
  from pydantic import BaseModel
15
 
16
  # ----------------- Configuration & Models -----------------
17
+ load_dotenv()
18
+
19
 
20
  @dataclass
21
  class Config:
 
40
 
41
 
42
  # ----------------- Enhanced Logger -----------------
43
+ def setup_logger() -> logging.Logger:
44
+ cfg = Config()
45
+ log_level = getattr(logging, cfg.LOG_LEVEL.upper(), logging.INFO)
46
+
47
+ # Avoid adding duplicate handlers if called multiple times
48
+ logger = logging.getLogger("gemma_saas")
49
+ if not logger.handlers:
50
+ logger.setLevel(log_level)
51
 
52
+ formatter = logging.Formatter(
53
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
54
+ )
55
+
56
+ file_handler = logging.FileHandler("gemma_saas.log")
57
+ file_handler.setFormatter(formatter)
58
+
59
+ stream_handler = logging.StreamHandler()
60
+ stream_handler.setFormatter(formatter)
61
+
62
+ logger.addHandler(file_handler)
63
+ logger.addHandler(stream_handler)
64
+
65
+ return logger
66
 
67
 
68
  logger = setup_logger()
 
75
  self.pipeline = None
76
  self.model_loaded = False
77
 
78
+ async def initialize(self) -> None:
79
  if not self.config.HF_TOKEN:
80
+ logger.error(
81
+ "Token do Hugging Face não encontrado. O carregamento do modelo irá falhar."
82
+ )
83
  return
84
 
85
  try:
86
  logger.info(f"A carregar o modelo: {self.config.MODEL_NAME}...")
87
+
88
+ loop = asyncio.get_event_loop()
89
 
90
  def load_pipeline():
91
+ # Nota: dependendo da versão do transformers/huggingface-hub,
92
+ # o parâmetro pode ser use_auth_token ou token. Ajuste se necessário.
93
  return pipeline(
94
+ "text-generation",
95
  model=self.config.MODEL_NAME,
 
96
  model_kwargs={"torch_dtype": "auto"},
97
+ device_map="auto",
98
  use_auth_token=self.config.HF_TOKEN,
99
  )
100
 
 
102
  self.model_loaded = True
103
  logger.info("✅ Modelo carregado com sucesso!")
104
  except Exception as e:
105
+ logger.error(f"❌ Erro ao carregar o modelo: {e}", exc_info=True)
 
106
 
107
  async def generate(self, request: GenerationRequest) -> Tuple[bool, str, int]:
108
  if not self.model_loaded or self.pipeline is None:
109
+ return (
110
+ False,
111
+ "❌ O modelo não está disponível. Por favor, verifique os logs do servidor.",
112
+ 0,
113
+ )
114
 
115
  try:
116
+ if not request.prompt.strip():
117
+ return False, "⚠️ O prompt não pode estar vazio.", 0
118
+
119
+ loop = asyncio.get_event_loop()
120
+ messages = [{"role": "user", "content": request.prompt.strip()}]
121
 
122
  def do_generation():
123
+ # A pipeline que usava apply_chat_template é específica de alguns tokenizers;
124
+ # mantemos o uso mas com fallback simples se não existir.
125
+ tokenizer = getattr(self.pipeline, "tokenizer", None)
126
+ if tokenizer and hasattr(tokenizer, "apply_chat_template"):
127
+ prompt_text = tokenizer.apply_chat_template(
128
+ messages, tokenize=False, add_generation_prompt=True
129
+ )
130
+ else:
131
+ # Fallback simples
132
+ prompt_text = request.prompt.strip()
133
 
134
  outputs = self.pipeline(
135
  prompt_text,
 
140
  top_p=request.top_p,
141
  )
142
 
 
143
  generated_text = outputs[0].get("generated_text", "")
144
+ # Se usamos prompt_text, retirar a parte inicial que corresponde ao prompt
145
+ if generated_text.startswith(prompt_text):
146
+ generated_text = generated_text[len(prompt_text) :]
147
 
 
148
  tokens_used = 0
149
+ if tokenizer and hasattr(tokenizer, "encode"):
150
+ try:
151
+ tokens_used = len(tokenizer.encode(generated_text))
152
+ except Exception:
153
+ tokens_used = 0
 
 
 
 
154
 
155
  return generated_text, tokens_used
156
 
 
158
  return True, generated_text, tokens_used
159
 
160
  except Exception as e:
161
+ logger.error(f"Erro na geração: {e}", exc_info=True)
162
  return False, f"❌ A geração falhou: {str(e)}", 0
163
 
164
 
 
174
  async def generate_text(self, api_key: str, prompt: str, **kwargs) -> APIResponse:
175
  if not api_key or not api_key.startswith("gsk-"):
176
  return APIResponse(success=False, error="Chave de API inválida ou ausente.")
 
177
  try:
178
  request = GenerationRequest(prompt=prompt, **kwargs)
179
  success, text, tokens_used = await self.model_manager.generate(request)
180
  if success:
181
+ return APIResponse(
182
+ success=True, data={"generated_text": text, "tokens_used": tokens_used}
183
+ )
184
  else:
185
  return APIResponse(success=False, error=text)
186
  except Exception as e:
187
+ logger.error(f"Erro de serviço durante a geração de texto: {e}", exc_info=True)
188
  return APIResponse(success=False, error="Ocorreu um erro interno no serviço.")
189
 
190
 
 
193
  def __init__(self, service: GemmaService):
194
  self.service = service
195
 
196
+ def create_custom_css(self) -> str:
197
  return """
198
  :root {
199
  --dark-bg: #0a0a0a; --panel-bg: #1a1a1a; --border-color: #333;
 
222
  .gr-slider { color: var(--text-light); }
223
  """
224
 
225
+ async def create_interface(self) -> gr.Blocks:
226
  with gr.Blocks(css=self.create_custom_css(), theme=None) as app:
227
  with gr.Row(elem_id="main_layout", equal_height=False):
228
  with gr.Column(scale=2):
229
  with gr.Column(elem_id="left_panel"):
230
+ output_display = gr.Markdown(
231
+ elem_id="output_display",
232
+ value="<p style='color: #a0a0a0;'>A sua resposta aparecerá aqui...</p>",
233
+ )
234
  with gr.Column(elem_id="input_area"):
235
+ api_key_input = gr.Textbox(
236
+ label="A Sua Chave de API",
237
+ placeholder="Cole a sua chave gsk-... aqui",
238
+ type="password",
239
+ elem_id="api_key_input",
240
+ )
241
  with gr.Row():
242
+ prompt_input = gr.Textbox(
243
+ show_label=False,
244
+ placeholder="Digite a sua mensagem...",
245
+ elem_id="prompt_input",
246
+ scale=10,
247
+ )
248
  send_button = gr.Button("➤ Enviar", elem_id="send_button", scale=2)
249
 
250
  with gr.Column(scale=1):
 
253
  key_button = gr.Button("✨ Gerar Nova Chave", elem_id="generate_button")
254
 
255
  with gr.Accordion("Parâmetros Avançados", open=False):
256
+ temp_slider = gr.Slider(
257
+ minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperatura"
258
+ )
259
+ max_tokens_slider = gr.Slider(
260
+ minimum=64, maximum=self.service.config.MAX_TOKENS, value=512, step=64, label="Max Tokens"
261
+ )
262
+ top_k_slider = gr.Slider(
263
+ minimum=1, maximum=100, value=50, step=1, label="Top-K"
264
+ )
265
+ top_p_slider = gr.Slider(
266
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"
267
+ )
268
 
269
  gr.Markdown("### Como Usar a API")
270
+ api_example_display = gr.HTML(
271
+ "<p style='color: #a0a0a0;'>Clique em 'Gerar Nova Chave' para ver um exemplo de código.</p>"
272
+ )
273
 
274
  def handle_key_generation():
275
+ # Gera chave e mostra exemplo de payload
276
  key = f"gsk-{secrets.token_urlsafe(24).replace('_', '').replace('-', '')}"
277
  code_html = f"""
278
+ <div class="code-snippet">
279
+ <div><span class="keyword">import</span> requests</div>
280
  <div>&nbsp;</div>
281
+ <div>url = <span class="string">"https://SEU_SPACE.hf.space/run/generate"</span></div>
282
  <div>payload = {{</div>
283
+ <div>&nbsp;&nbsp;&nbsp;&nbsp;<span class="string">"api_key"</span>: <span class="string">"{key}"</span>,</div>
284
+ <div>&nbsp;&nbsp;&nbsp;&nbsp;<span class="string">"prompt"</span>: <span class="string">"Escreva um haikai sobre o universo"</span>,</div>
285
+ <div>&nbsp;&nbsp;&nbsp;&nbsp;<span class="string">"max_tokens"</span>: <span class="number">50</span></div>
286
  <div>}}</div>
287
  <div>&nbsp;</div>
288
  <div>response = requests.post(url, json=payload)</div>
289
+ <div><span class="keyword">print</span>(response.json())</div>
290
  </div>
291
  """
292
+ return key, gr.update(value=code_html)
 
293
 
294
  async def handle_generation(api_key, prompt, temp, max_tokens, top_k, top_p, btn):
295
+ # Validações básicas
296
  if not api_key:
297
+ yield (
298
+ "<p style='color: #FFCC00;'>Por favor, insira a sua chave de API para começar.</p>",
299
+ gr.update(value="➤ Enviar", interactive=True),
300
+ )
301
  return
302
+
303
  if not prompt:
304
+ yield (
305
+ "<p style='color: #FFCC00;'>Por favor, digite um prompt.</p>",
306
+ gr.update(value="➤ Enviar", interactive=True),
307
+ )
308
  return
309
 
310
+ # Indicador de carregamento
311
+ yield "<p style='color: #a0a0a0;'>A gerar resposta...</p>", gr.update(value="A gerar...", interactive=False)
312
 
 
313
  response = await self.service.generate_text(
314
  api_key=api_key,
315
  prompt=prompt,
316
+ temperature=temp,
317
  max_tokens=int(max_tokens),
318
  top_k=int(top_k),
319
+ top_p=top_p,
320
  )
321
 
322
  if response.success:
323
  formatted_text = html.escape(response.data["generated_text"]).replace("\n", "<br>")
324
+ yield formatted_text, gr.update(value="➤ Enviar", interactive=True)
325
  else:
326
+ yield f"<p style='color: #FF4500;'>{response.error}</p>", gr.update(value="➤ Enviar", interactive=True)
327
 
328
+ # Registar handlers
329
  send_button.click(
330
  handle_generation,
331
  inputs=[api_key_input, prompt_input, temp_slider, max_tokens_slider, top_k_slider, top_p_slider, send_button],
 
335
 
336
  key_button.click(handle_key_generation, outputs=[api_key_input, api_example_display])
337
 
338
+ # Carregar exemplo inicial
339
+ app.load(
340
+ lambda: gr.update(value="<p style='color: #a0a0a0;'>Clique em 'Gerar Nova Chave' para ver um exemplo de código.</p>"),
341
+ [], # sem inputs
342
+ [api_example_display],
343
+ )
344
+
345
  return app
346
 
347
 
348
  # ----------------- Main Application -----------------
349
+ async def main():
 
350
  try:
351
  service = GemmaService()
352
+ await service.initialize()
 
353
 
354
  interface = GradioInterface(service)
355
+ app = await interface.create_interface()
356
 
357
+ # Ligar a interface Gradio
358
+ app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=False)
359
  except Exception as e:
360
  logger.critical(f"Falha ao iniciar a aplicação: {e}", exc_info=True)
361
 
362
 
363
  if __name__ == "__main__":
364
+ asyncio.run(main())