rafavidal1709 commited on
Commit
a3b686b
·
verified ·
1 Parent(s): b9de2e2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+
5
+ # Configurações do modelo DeepSeek-R1
6
+ MODEL_NAME = "deepseek-ai/deepseek-R1" # Verificar nome exato no Hugging Face Hub
7
+
8
+ # Carregar tokenizer e modelo
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
11
+
12
+ # Configurações de comprimento
13
+ TARGET_LENGTH = 256
14
+ MARGIN = 6
15
+ MIN_LENGTH = TARGET_LENGTH - MARGIN
16
+ MAX_LENGTH = TARGET_LENGTH + MARGIN
17
+ MAX_ATTEMPTS = 5
18
+
19
+ def summarize_text(text):
20
+ """
21
+ Gera resumo adaptado para o DeepSeek-R1 com ajuste de comprimento
22
+ """
23
+ best_summary = ""
24
+ best_distance = float("inf")
25
+ adjusted_max_tokens = 512 # Valor inicial ajustável
26
+
27
+ for attempt in range(MAX_ATTEMPTS):
28
+ # Formatar prompt para sumarização
29
+ prompt = f"Resuma o seguinte texto em português com cerca de {TARGET_LENGTH} caracteres:\n{text}\nResumo:"
30
+
31
+ inputs = tokenizer.encode(
32
+ prompt,
33
+ return_tensors="pt",
34
+ max_length=4096, # Ajustar conforme capacidade do modelo
35
+ truncation=True
36
+ )
37
+
38
+ # Gerar sumário
39
+ summary_ids = model.generate(
40
+ inputs,
41
+ max_new_tokens=adjusted_max_tokens,
42
+ num_beams=5,
43
+ repetition_penalty=1.2,
44
+ early_stopping=True,
45
+ temperature=0.7,
46
+ top_p=0.9
47
+ )
48
+
49
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
50
+
51
+ # Filtrar apenas o resumo gerado (remover prompt)
52
+ if "Resumo:" in summary:
53
+ summary = summary.split("Resumo:")[-1].strip()
54
+
55
+ summary_length = len(summary)
56
+ distance = abs(TARGET_LENGTH - summary_length)
57
+
58
+ if distance < best_distance:
59
+ best_summary = summary
60
+ best_distance = distance
61
+
62
+ if MIN_LENGTH <= summary_length <= MAX_LENGTH:
63
+ return summary[:MAX_LENGTH] # Garantir limite máximo
64
+
65
+ # Ajuste adaptativo
66
+ adjustment = int((summary_length / TARGET_LENGTH) * adjusted_max_tokens)
67
+ adjusted_max_tokens = max(32, adjusted_max_tokens - adjustment)
68
+
69
+ return best_summary[:MAX_LENGTH]
70
+
71
+ # Interface Gradio
72
+ interface = gr.Interface(
73
+ fn=summarize_text,
74
+ inputs=gr.Textbox(label="Texto", lines=10, placeholder="Digite seu texto aqui..."),
75
+ outputs=gr.Textbox(label="Resumo"),
76
+ title="Resumidor com DeepSeek-R1",
77
+ description="Resumos automáticos em português com ajuste de tamanho (250-262 caracteres)",
78
+ )
79
+
80
+ if __name__ == "__main__":
81
+ interface.launch(share=True)