Madras1 commited on
Commit
d5dac55
verified
1 Parent(s): d6f2e7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -17
app.py CHANGED
@@ -4,20 +4,17 @@ import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
  # --- CONFIGURA脟脙O DOS MODELOS ---
7
- # IDs Oficiais do Hugging Face
8
  MODELS = {
9
- "deepseek": "deepseek-ai/deepseek-math-7b-instruct",
10
- "llama3": "meta-llama/Meta-Llama-3-8B-Instruct",
11
- "gemma2": "google/gemma-2-9b-it"
12
  }
13
 
14
  # --- VARI脕VEIS GLOBAIS (CACHE NA VRAM) ---
15
- # Vamos guardar tudo na mem贸ria da H200
16
  loaded_models = {}
17
  loaded_tokenizers = {}
18
 
19
  def get_model_and_tokenizer(model_key):
20
- """Carrega o modelo na VRAM apenas se ainda n茫o estiver l谩."""
21
  global loaded_models, loaded_tokenizers
22
 
23
  if model_key not in loaded_models:
@@ -27,7 +24,7 @@ def get_model_and_tokenizer(model_key):
27
  tokenizer = AutoTokenizer.from_pretrained(model_id)
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_id,
30
- torch_dtype=torch.bfloat16, # BF16 economiza mem贸ria e 茅 r谩pido na H200
31
  device_map="cuda"
32
  )
33
 
@@ -38,9 +35,8 @@ def get_model_and_tokenizer(model_key):
38
  return loaded_models[model_key], loaded_tokenizers[model_key]
39
 
40
  # --- FUN脟脙O DE GERA脟脙O (ZEROGPU) ---
41
- @spaces.GPU(duration=120) # 2 min 茅 seguro para respostas longas de matem谩tica
42
  def generate(message, history, model_selector):
43
- # Identifica qual modelo o usu谩rio quer
44
  if "DeepSeek" in model_selector:
45
  key = "deepseek"
46
  elif "Llama" in model_selector:
@@ -48,19 +44,16 @@ def generate(message, history, model_selector):
48
  elif "Gemma" in model_selector:
49
  key = "gemma2"
50
  else:
51
- key = "deepseek" # Padr茫o
52
 
53
  model, tokenizer = get_model_and_tokenizer(key)
54
 
55
- # Formata o prompt (Cada modelo tem seu jeito, mas o tokenizer resolve)
56
- # Convertendo hist贸rico para formato de lista de dicts
57
  messages = []
58
  for user_msg, bot_msg in history:
59
  if user_msg: messages.append({"role": "user", "content": user_msg})
60
  if bot_msg: messages.append({"role": "assistant", "content": bot_msg})
61
  messages.append({"role": "user", "content": message})
62
 
63
- # Aplica o template de chat correto para o modelo
64
  text = tokenizer.apply_chat_template(
65
  messages,
66
  tokenize=False,
@@ -69,8 +62,6 @@ def generate(message, history, model_selector):
69
 
70
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
71
 
72
- # Gera a resposta
73
- # Max tokens alto pq matem谩tica exige passo-a-passo
74
  outputs = model.generate(
75
  **inputs,
76
  max_new_tokens=2048,
@@ -81,8 +72,9 @@ def generate(message, history, model_selector):
81
  response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
82
  return response
83
 
84
- # --- INTERFACE GRADIO ---
85
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
86
  gr.Markdown("# 馃М M贸dulo Matem谩tico & L贸gico (H200)")
87
 
88
  with gr.Row():
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
  # --- CONFIGURA脟脙O DOS MODELOS ---
 
7
  MODELS = {
8
+ "deepseek": "deepseek-ai/deepseek-math-7b-instruct",
9
+ "llama3": "meta-llama/Meta-Llama-3-8B-Instruct",
10
+ "gemma2": "google/gemma-2-9b-it"
11
  }
12
 
13
  # --- VARI脕VEIS GLOBAIS (CACHE NA VRAM) ---
 
14
  loaded_models = {}
15
  loaded_tokenizers = {}
16
 
17
  def get_model_and_tokenizer(model_key):
 
18
  global loaded_models, loaded_tokenizers
19
 
20
  if model_key not in loaded_models:
 
24
  tokenizer = AutoTokenizer.from_pretrained(model_id)
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_id,
27
+ torch_dtype=torch.bfloat16,
28
  device_map="cuda"
29
  )
30
 
 
35
  return loaded_models[model_key], loaded_tokenizers[model_key]
36
 
37
  # --- FUN脟脙O DE GERA脟脙O (ZEROGPU) ---
38
+ @spaces.GPU(duration=120)
39
  def generate(message, history, model_selector):
 
40
  if "DeepSeek" in model_selector:
41
  key = "deepseek"
42
  elif "Llama" in model_selector:
 
44
  elif "Gemma" in model_selector:
45
  key = "gemma2"
46
  else:
47
+ key = "deepseek"
48
 
49
  model, tokenizer = get_model_and_tokenizer(key)
50
 
 
 
51
  messages = []
52
  for user_msg, bot_msg in history:
53
  if user_msg: messages.append({"role": "user", "content": user_msg})
54
  if bot_msg: messages.append({"role": "assistant", "content": bot_msg})
55
  messages.append({"role": "user", "content": message})
56
 
 
57
  text = tokenizer.apply_chat_template(
58
  messages,
59
  tokenize=False,
 
62
 
63
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
64
 
 
 
65
  outputs = model.generate(
66
  **inputs,
67
  max_new_tokens=2048,
 
72
  response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
73
  return response
74
 
75
+ # --- INTERFACE GRADIO (SEM TEMA PARA N脙O DAR ERRO) ---
76
+ # Mudei aqui: Tirei o theme=gr.themes.Soft()
77
+ with gr.Blocks() as demo:
78
  gr.Markdown("# 馃М M贸dulo Matem谩tico & L贸gico (H200)")
79
 
80
  with gr.Row():