rafavidal1709 commited on
Commit
22c7a6d
·
verified ·
1 Parent(s): b070625

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -60
app.py CHANGED
@@ -5,78 +5,38 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
5
  # Configurações do modelo DeepSeek-R1
6
  MODEL_NAME = "deepseek-ai/DeepSeek-R1"
7
 
8
- # Carregar tokenizer e modelo; Configurar para CPU
9
  tokenizer = AutoTokenizer.from_pretrained(
10
  MODEL_NAME,
11
- trust_remote_code=True
 
12
  )
13
 
14
  model = AutoModelForCausalLM.from_pretrained(
15
  MODEL_NAME,
16
  trust_remote_code=True,
17
  torch_dtype=torch.float32,
18
- device_map="auto",
19
- low_cpu_mem_usage=True
 
 
20
  )
21
 
22
- # Configurações de comprimento
23
- TARGET_LENGTH = 256
24
- MARGIN = 6
25
- MIN_LENGTH = TARGET_LENGTH - MARGIN
26
- MAX_LENGTH = TARGET_LENGTH + MARGIN
27
- MAX_ATTEMPTS = 5
28
-
29
  def summarize_text(text):
30
- """
31
- Gera resumo adaptado para o DeepSeek-R1 com ajuste de comprimento
32
- """
33
- best_summary = ""
34
- best_distance = float("inf")
35
- adjusted_max_tokens = 512 # Valor inicial ajustável
36
-
37
- for attempt in range(MAX_ATTEMPTS):
38
- # Formatar prompt para sumarização
39
- prompt = f"Resuma o seguinte texto em português com cerca de {TARGET_LENGTH} caracteres:\n{text}\nResumo:"
40
-
41
- inputs = tokenizer.encode(
42
- prompt,
43
- return_tensors="pt",
44
- max_length=4096, # Ajustar conforme capacidade do modelo
45
- truncation=True
46
  )
47
-
48
- # Gerar sumário
49
- summary_ids = model.generate(
50
- inputs,
51
- max_new_tokens=adjusted_max_tokens,
52
- num_beams=5,
53
- repetition_penalty=1.2,
54
- early_stopping=True,
55
- temperature=0.7,
56
- top_p=0.9
57
- )
58
-
59
- summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
60
-
61
- # Filtrar apenas o resumo gerado (remover prompt)
62
- if "Resumo:" in summary:
63
- summary = summary.split("Resumo:")[-1].strip()
64
-
65
- summary_length = len(summary)
66
- distance = abs(TARGET_LENGTH - summary_length)
67
-
68
- if distance < best_distance:
69
- best_summary = summary
70
- best_distance = distance
71
-
72
- if MIN_LENGTH <= summary_length <= MAX_LENGTH:
73
- return summary[:MAX_LENGTH] # Garantir limite máximo
74
-
75
- # Ajuste adaptativo
76
- adjustment = int((summary_length / TARGET_LENGTH) * adjusted_max_tokens)
77
- adjusted_max_tokens = max(32, adjusted_max_tokens - adjustment)
78
-
79
- return best_summary[:MAX_LENGTH]
80
 
81
  # Interface Gradio
82
  interface = gr.Interface(
 
5
  # Configurações do modelo DeepSeek-R1
6
  MODEL_NAME = "deepseek-ai/DeepSeek-R1"
7
 
8
+ # Configuração segura para CPU
9
  tokenizer = AutoTokenizer.from_pretrained(
10
  MODEL_NAME,
11
+ trust_remote_code=True,
12
+ revision="6528ae3" # Fixar versão específica
13
  )
14
 
15
  model = AutoModelForCausalLM.from_pretrained(
16
  MODEL_NAME,
17
  trust_remote_code=True,
18
  torch_dtype=torch.float32,
19
+ device_map="cpu",
20
+ low_cpu_mem_usage=True,
21
+ load_in_8bit=False,
22
+ offload_folder="offload" # Pasta para descarregar pesos grandes
23
  )
24
 
 
 
 
 
 
 
 
25
  def summarize_text(text):
26
+ prompt = f"Resuma em português ({TARGET_LENGTH} caracteres): {text}"
27
+ inputs = tokenizer(prompt, return_tensors="pt", max_length=2048, truncation=True)
28
+
29
+ with torch.inference_mode():
30
+ outputs = model.generate(
31
+ **inputs,
32
+ max_new_tokens=MAX_LENGTH,
33
+ temperature=0.9,
34
+ top_k=50,
35
+ no_repeat_ngram_size=3
 
 
 
 
 
 
36
  )
37
+
38
+ summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
+ return summary[len(prompt):].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # Interface Gradio
42
  interface = gr.Interface(