Spaces:

rafavidal1709
/

Summarization-Deep-Seek-R1

Runtime error

App Files Files Community

rafavidal1709 commited on Feb 18, 2025

Commit

a3b686b

verified ·

1 Parent(s): b9de2e2

Create app.py

Browse files

Files changed (1) hide show

app.py +81 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import torch
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Configurações do modelo DeepSeek-R1
+MODEL_NAME = "deepseek-ai/deepseek-R1"  # Verificar nome exato no Hugging Face Hub
+# Carregar tokenizer e modelo
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+# Configurações de comprimento
+TARGET_LENGTH = 256
+MARGIN = 6
+MIN_LENGTH = TARGET_LENGTH - MARGIN
+MAX_LENGTH = TARGET_LENGTH + MARGIN
+MAX_ATTEMPTS = 5
+def summarize_text(text):
+    """
+    Gera resumo adaptado para o DeepSeek-R1 com ajuste de comprimento
+    """
+    best_summary = ""
+    best_distance = float("inf")
+    adjusted_max_tokens = 512  # Valor inicial ajustável
+    for attempt in range(MAX_ATTEMPTS):
+        # Formatar prompt para sumarização
+        prompt = f"Resuma o seguinte texto em português com cerca de {TARGET_LENGTH} caracteres:\n{text}\nResumo:"
+        inputs = tokenizer.encode(
+            prompt,
+            return_tensors="pt",
+            max_length=4096,  # Ajustar conforme capacidade do modelo
+            truncation=True
+        )
+        # Gerar sumário
+        summary_ids = model.generate(
+            inputs,
+            max_new_tokens=adjusted_max_tokens,
+            num_beams=5,
+            repetition_penalty=1.2,
+            early_stopping=True,
+            temperature=0.7,
+            top_p=0.9
+        )
+        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+        # Filtrar apenas o resumo gerado (remover prompt)
+        if "Resumo:" in summary:
+            summary = summary.split("Resumo:")[-1].strip()
+        summary_length = len(summary)
+        distance = abs(TARGET_LENGTH - summary_length)
+        if distance < best_distance:
+            best_summary = summary
+            best_distance = distance
+        if MIN_LENGTH <= summary_length <= MAX_LENGTH:
+            return summary[:MAX_LENGTH]  # Garantir limite máximo
+        # Ajuste adaptativo
+        adjustment = int((summary_length / TARGET_LENGTH) * adjusted_max_tokens)
+        adjusted_max_tokens = max(32, adjusted_max_tokens - adjustment)
+    return best_summary[:MAX_LENGTH]
+# Interface Gradio
+interface = gr.Interface(
+    fn=summarize_text,
+    inputs=gr.Textbox(label="Texto", lines=10, placeholder="Digite seu texto aqui..."),
+    outputs=gr.Textbox(label="Resumo"),
+    title="Resumidor com DeepSeek-R1",
+    description="Resumos automáticos em português com ajuste de tamanho (250-262 caracteres)",
+)
+if __name__ == "__main__":
+    interface.launch(share=True)