File size: 4,349 Bytes
b12c25f
2d5e2c7
 
 
b12c25f
2d5e2c7
 
2048899
2d5e2c7
 
 
 
 
 
 
 
 
2048899
2d5e2c7
 
2048899
2d5e2c7
 
 
 
 
 
 
2048899
2d5e2c7
 
 
6dde8db
2d5e2c7
 
 
6dde8db
2d5e2c7
 
 
 
 
 
 
 
 
6dde8db
2d5e2c7
6dde8db
2d5e2c7
 
 
6dde8db
2d5e2c7
6dde8db
2d5e2c7
 
b12c25f
2d5e2c7
 
 
 
6dde8db
2d5e2c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6dde8db
2d5e2c7
 
6dde8db
2d5e2c7
 
 
 
 
6dde8db
2d5e2c7
 
 
 
 
 
 
6dde8db
2d5e2c7
 
 
 
6dde8db
 
2d5e2c7
 
 
b12c25f
 
2d5e2c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import re

# Carrega o modelo e tokenizer - adaptar conforme a documentação específica do modelo
def load_model():
    try:
        print("Carregando modelo LLM4Binary/sk2decompile-struct-6.7b...")
        tokenizer = AutoTokenizer.from_pretrained("LLM4Binary/sk2decompile-struct-6.7b")
        model = AutoModelForCausalLM.from_pretrained(
            "LLM4Binary/sk2decompile-struct-6.7b",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            device_map="auto" if torch.cuda.is_available() else None
        )
        print("Modelo carregado com sucesso!")
        return model, tokenizer
    except Exception as e:
        print(f"Erro ao carregar modelo: {e}")
        return None, None

# Função de decompilação - precisa ser adaptada conforme a API do modelo
def decompile_binary(assembly_code, max_length=512, temperature=0.7):
    model, tokenizer = load_model()
    
    if model is None or tokenizer is None:
        return "Erro ao carregar o modelo. Verifique se o nome do modelo está correto."
    
    try:
        # Formata o input conforme esperado pelo modelo de decompilação
        # Este formato precisa ser adaptado baseado na documentação do modelo específico
        prompt = f"Decompile the following x86 assembly to C code:\n\n{assembly_code}\n\nC code:"
        
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
        if torch.cuda.is_available():
            inputs = {k: v.cuda() for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_length,
                temperature=temperature,
                do_sample=True,
                top_p=0.95,
                pad_token_id=tokenizer.eos_token_id
            )
        
        decompiled_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extrai apenas o código C gerado (remove o prompt original)
        if "C code:" in decompiled_code:
            decompiled_code = decompiled_code.split("C code:")[1].strip()
        
        return decompiled_code
    
    except Exception as e:
        return f"Erro durante a decompilação: {str(e)}"

# Interface Gradio
with gr.Blocks(title="Binary Decompiler") as demo:
    gr.Markdown("# 🧠 SK²Decompile - Binary to C Code Decompiler")
    gr.Markdown("Decompile x86 assembly code to readable C code using LLM4Binary/sk2decompile-struct-6.7b")
    
    with gr.Row():
        with gr.Column():
            assembly_input = gr.Textbox(
                label="Assembly Code (x86)",
                placeholder="Digite ou cole seu código assembly aqui...",
                lines=15,
                max_lines=30
            )
            with gr.Row():
                max_length = gr.Slider(128, 1024, value=512, step=1, label="Max Length")
                temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
            
            decompile_btn = gr.Button("Decompile", variant="primary")
        
        with gr.Column():
            c_output = gr.Code(
                label="Decompiled C Code",
                language="c",
                lines=20
            )
    
    gr.Markdown("### Exemplo de uso:")
    gr.Markdown("```\nmov eax, 5\nadd eax, ebx\nret\n```")
    
    # Exemplos predefinidos
    examples = [
        ["mov eax, 5\nadd eax, ebx\nret"],
        ["push ebp\nmov ebp, esp\nmov eax, [ebp+8]\nadd eax, [ebp+12]\npop ebp\nret"]
    ]
    
    gr.Examples(
        examples=examples,
        inputs=assembly_input,
        outputs=c_output,
        fn=decompile_binary,
        cache_examples=True
    )
    
    decompile_btn.click(
        fn=decompile_binary,
        inputs=[assembly_input, max_length, temperature],
        outputs=c_output
    )
    
    gr.Markdown("---")
    gr.Markdown("🔗 **Model**: [LLM4Binary/sk2decompile-struct-6.7b](https://huggingface.co/LLM4Binary/sk2decompile-struct-6.7b)")
    gr.Markdown("💡 **Note**: Este é um modelo de 6.7B parâmetros especializado em decompilação binária. Para melhores resultados, forneça funções assembly completas.")

if __name__ == "__main__":
    demo.launch()