import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer import re # Carrega o modelo e tokenizer - adaptar conforme a documentação específica do modelo def load_model(): try: print("Carregando modelo LLM4Binary/sk2decompile-struct-6.7b...") tokenizer = AutoTokenizer.from_pretrained("LLM4Binary/sk2decompile-struct-6.7b") model = AutoModelForCausalLM.from_pretrained( "LLM4Binary/sk2decompile-struct-6.7b", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None ) print("Modelo carregado com sucesso!") return model, tokenizer except Exception as e: print(f"Erro ao carregar modelo: {e}") return None, None # Função de decompilação - precisa ser adaptada conforme a API do modelo def decompile_binary(assembly_code, max_length=512, temperature=0.7): model, tokenizer = load_model() if model is None or tokenizer is None: return "Erro ao carregar o modelo. Verifique se o nome do modelo está correto." try: # Formata o input conforme esperado pelo modelo de decompilação # Este formato precisa ser adaptado baseado na documentação do modelo específico prompt = f"Decompile the following x86 assembly to C code:\n\n{assembly_code}\n\nC code:" inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_length, temperature=temperature, do_sample=True, top_p=0.95, pad_token_id=tokenizer.eos_token_id ) decompiled_code = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extrai apenas o código C gerado (remove o prompt original) if "C code:" in decompiled_code: decompiled_code = decompiled_code.split("C code:")[1].strip() return decompiled_code except Exception as e: return f"Erro durante a decompilação: {str(e)}" # Interface Gradio with gr.Blocks(title="Binary Decompiler") as demo: gr.Markdown("# 🧠 SK²Decompile - Binary to C Code Decompiler") gr.Markdown("Decompile x86 assembly code to readable C code using LLM4Binary/sk2decompile-struct-6.7b") with gr.Row(): with gr.Column(): assembly_input = gr.Textbox( label="Assembly Code (x86)", placeholder="Digite ou cole seu código assembly aqui...", lines=15, max_lines=30 ) with gr.Row(): max_length = gr.Slider(128, 1024, value=512, step=1, label="Max Length") temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature") decompile_btn = gr.Button("Decompile", variant="primary") with gr.Column(): c_output = gr.Code( label="Decompiled C Code", language="c", lines=20 ) gr.Markdown("### Exemplo de uso:") gr.Markdown("```\nmov eax, 5\nadd eax, ebx\nret\n```") # Exemplos predefinidos examples = [ ["mov eax, 5\nadd eax, ebx\nret"], ["push ebp\nmov ebp, esp\nmov eax, [ebp+8]\nadd eax, [ebp+12]\npop ebp\nret"] ] gr.Examples( examples=examples, inputs=assembly_input, outputs=c_output, fn=decompile_binary, cache_examples=True ) decompile_btn.click( fn=decompile_binary, inputs=[assembly_input, max_length, temperature], outputs=c_output ) gr.Markdown("---") gr.Markdown("🔗 **Model**: [LLM4Binary/sk2decompile-struct-6.7b](https://huggingface.co/LLM4Binary/sk2decompile-struct-6.7b)") gr.Markdown("💡 **Note**: Este é um modelo de 6.7B parâmetros especializado em decompilação binária. Para melhores resultados, forneça funções assembly completas.") if __name__ == "__main__": demo.launch()