Spaces:

TaylorKaua
/

Sk2decompile2

Sleeping

App Files Files Community

TaylorKaua commited on Nov 7, 2025

Commit

6dde8db

verified ·

1 Parent(s): 2048899

Update app.py

Browse files

Files changed (1) hide show

app.py +304 -143

app.py CHANGED Viewed

@@ -1,161 +1,322 @@
-# app.py — versão robusta sem LoginButton (usa HF_TOKEN secret)
 import os
 import gradio as gr
-from huggingface_hub import InferenceClient
-MODEL_ID = "LLM4Binary/sk2decompile-struct-6.7b"  # use endpoint hospedado (Inference API)
-def get_client(hf_token_obj):
-    # hf_token_obj vem do Gradio apenas quando há OAuth; senão usamos HF_TOKEN secret
-    token = None
-    try:
-        token = getattr(hf_token_obj, "token", None)
-    except Exception:
-        token = None
-    if not token:
-        token = os.getenv("HF_TOKEN")
-    return InferenceClient(model=MODEL_ID, token=token)
-def respond(message, history, *args):
-    """
-    Maneira robusta de receber argumentos vindos do ChatInterface.
-    args (na ordem do additional_inputs):
-      0 -> system_message (Textbox)
-      1 -> max_tokens (Slider)
-      2 -> temperature (Slider)
-      3 -> top_p (Slider)
-      4 -> hf_token (quando existe Login/OAuth) -- aqui normalmente None porque removemos LoginButton
-    """
-    # defaults
-    system_message = ""
-    try:
-        if len(args) >= 1 and args[0] is not None:
-            system_message = args[0]
-    except Exception:
-        system_message = ""
-    try:
-        max_tokens = int(args[1]) if len(args) >= 2 and args[1] is not None else 512
-    except Exception:
-        max_tokens = 512
-    try:
-        temperature = float(args[2]) if len(args) >= 3 and args[2] is not None else 0.7
-    except Exception:
-        temperature = 0.7
-    try:
-        top_p = float(args[3]) if len(args) >= 4 and args[3] is not None else 0.95
-    except Exception:
-        top_p = 0.95
-    hf_token_obj = args[4] if len(args) >= 5 else None
     try:
-        client = get_client(hf_token_obj)
     except Exception as e:
-        yield f"❌ Erro ao criar client: {e}"
-        return
-    msgs = []
-    if system_message:
-        msgs.append({"role": "system", "content": system_message})
-    # history normalmente é lista de pares (user, assistant) — ChatInterface pode enviar de formas diferentes
-    if history:
-        # se history já vier no formato role/content, adiciona diretamente
-        for item in history:
-            if isinstance(item, dict) and "role" in item and "content" in item:
-                msgs.append(item)
-            elif isinstance(item, (list, tuple)) and len(item) == 2:
-                # item = (user_text, assistant_text) — adiciona como duas mensagens
-                msgs.append({"role": "user", "content": item[0]})
-                msgs.append({"role": "assistant", "content": item[1]})
-    msgs.append({"role": "user", "content": message})
-    response = ""
     try:
-        stream = client.chat_completion(
-            messages=msgs,
-            max_tokens=max_tokens,
-            stream=True,
-            temperature=temperature,
-            top_p=top_p,
         )
     except Exception as e:
-        yield f"❌ Erro ao chamar chat_completion: {e}"
-        return
-    # extrator simples e tolerante de tokens
-    def _get_chunk_text(chunk):
         try:
-            if hasattr(chunk, "choices"):
-                c = chunk.choices
-                if c and len(c) > 0:
-                    delta = getattr(c[0], "delta", None)
-                    if delta:
-                        return getattr(delta, "content", "") or ""
-            if isinstance(chunk, dict):
-                if "generated_text" in chunk and chunk["generated_text"]:
-                    return chunk["generated_text"]
-                ch = chunk.get("choices", [])
-                if ch and isinstance(ch, list) and len(ch) > 0:
-                    first = ch[0]
-                    if isinstance(first, dict):
-                        d = first.get("delta")
-                        if isinstance(d, dict):
-                            return d.get("content", "") or ""
-                        msg = first.get("message")
-                        if isinstance(msg, dict):
-                            return msg.get("content", "") or ""
-            return ""
-        except Exception:
-            return ""
-    try:
-        for chunk in stream:
-            token = _get_chunk_text(chunk)
-            if token:
-                response += token
-                yield response
-        # se nada veio pelo stream, tentar chamada síncrona como fallback
-        if response == "":
             try:
-                final = client.chat_completion(messages=msgs, max_tokens=max_tokens, stream=False,
-                                               temperature=temperature, top_p=top_p)
-                # tentar extrair texto do final
-                if isinstance(final, dict) and "generated_text" in final:
-                    response = final["generated_text"]
-                elif hasattr(final, "choices") and final.choices:
-                    # tentar acessar message/content
-                    try:
-                        response = final.choices[0].message.content
-                    except Exception:
-                        # fallback
-                        pass
-                if response:
-                    yield response
-            except Exception:
-                pass
-    except Exception as e:
-        yield f"❌ Erro durante streaming: {e}"
-        return
-# UI — note que REMOVI o gr.LoginButton() para evitar exigir OAUTH config
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly assistant.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.Markdown("Configure HF_TOKEN em Settings → Secrets (opcional).")
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+# app.py - Versão ultra-robusta com múltiplos fallbacks
 import os
 import gradio as gr
+from huggingface_hub import InferenceClient, InferenceTimeoutError
+import time
+import logging
+from typing import Generator, Optional
+# Configuração de logging para debugging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Modelos em ordem de prioridade (do mais específico para o mais genérico)
+FALLBACK_MODELS = [
+    "LLM4Binary/sk2decompile-struct-6.7b",  # Modelo original
+    "mradermacher/sk2decompile-struct-6.7b-GGUF",  # Versão GGUF
+    "meta-llama/Meta-Llama-3-8B-Instruct",  # Fallback genérico confiável
+    "mistralai/Mistral-7B-Instruct-v0.2",   # Outro fallback
+    "google/gemma-2b-it"                    # Fallback leve
+]
+def get_valid_token(hf_token_obj: Optional[object] = None) -> str:
+    """Obtém um token válido de múltiplas fontes com fallbacks"""
     try:
+        # Tenta obter token do objeto OAuth
+        if hf_token_obj and hasattr(hf_token_obj, "token") and hf_token_obj.token:
+            logger.info("Usando token do OAuth")
+            return hf_token_obj.token
+        # Tenta variável de ambiente
+        env_token = os.getenv("HF_TOKEN", "").strip()
+        if env_token:
+            logger.info("Usando token da variável de ambiente HF_TOKEN")
+            return env_token
+        # Tenta arquivo .env
+        try:
+            from dotenv import load_dotenv
+            load_dotenv()
+            env_token = os.getenv("HF_TOKEN", "").strip()
+            if env_token:
+                logger.info("Usando token do arquivo .env")
+                return env_token
+        except ImportError:
+            pass
+        # Último fallback: tenta sem token (alguns modelos públicos permitem)
+        logger.warning("Nenhum token encontrado. Tentando sem autenticação...")
+        return None
     except Exception as e:
+        logger.error(f"Erro ao obter token: {e}")
+        return None
+def create_client(model_id: str, token: Optional[str] = None) -> InferenceClient:
+    """Cria um cliente de inferência com configurações seguras"""
     try:
+        # Configurações seguras com timeout
+        timeout = 60  # 60 segundos para opera��ões complexas
+        logger.info(f"Criando cliente para modelo: {model_id}")
+        client = InferenceClient(
+            model=model_id,
+            token=token,
+            timeout=timeout
+        )
+        # Teste rápido de conexão
+        test_response = client.post(
+            json={"inputs": "test"},
+            model=model_id,
+            timeout=10  # timeout curto para teste
         )
+        if test_response.status_code in [200, 400]:  # 400 pode ser erro de input, mas API está respondendo
+            logger.info(f"Conexão bem-sucedida com {model_id}")
+            return client
+        logger.warning(f"Teste falhou com status {test_response.status_code} para {model_id}")
+        return None
+    except InferenceTimeoutError:
+        logger.warning(f"Timeout ao conectar com {model_id}")
+        return None
     except Exception as e:
+        logger.error(f"Erro ao criar cliente para {model_id}: {e}")
+        return None
+def try_models(messages, max_tokens, temperature, top_p, token):
+    """Tenta múltiplos modelos em ordem até obter sucesso"""
+    for model_id in FALLBACK_MODELS:
         try:
+            logger.info(f"Tentando modelo: {model_id}")
+            client = create_client(model_id, token)
+            if not client:
+                logger.warning(f"Cliente inválido para {model_id}, pulando...")
+                continue
+            # Tenta streaming primeiro
+            response = ""
+            stream_success = False
             try:
+                stream = client.chat_completion(
+                    messages=messages,
+                    max_tokens=max_tokens,
+                    stream=True,
+                    temperature=temperature,
+                    top_p=top_p,
+                    timeout=120  # timeout maior para streaming
+                )
+                for chunk in stream:
+                    if hasattr(chunk, "choices") and chunk.choices:
+                        choice = chunk.choices[0]
+                        if hasattr(choice, "delta") and hasattr(choice.delta, "content"):
+                            token_content = choice.delta.content or ""
+                            if token_content:
+                                response += token_content
+                                yield response
+                                stream_success = True
+                if stream_success and response.strip():
+                    logger.info(f"Resposta obtida com sucesso de: {model_id} (streaming)")
+                    return
+            except Exception as stream_error:
+                logger.warning(f"Erro no streaming com {model_id}: {stream_error}")
+            # Fallback para chamada síncrona se streaming falhar
+            if not stream_success:
+                logger.info(f"Tentando chamada síncrona com {model_id}")
+                try:
+                    full_response = client.chat_completion(
+                        messages=messages,
+                        max_tokens=max_tokens,
+                        stream=False,
+                        temperature=temperature,
+                        top_p=top_p,
+                        timeout=60
+                    )
+                    # Extrai resposta de múltiplas formas possíveis
+                    if hasattr(full_response, "choices") and full_response.choices:
+                        choice = full_response.choices[0]
+                        if hasattr(choice, "message") and hasattr(choice.message, "content"):
+                            response = choice.message.content
+                        elif hasattr(choice, "text"):
+                            response = choice.text
+                    elif isinstance(full_response, dict):
+                        if "generated_text" in full_response:
+                            response = full_response["generated_text"]
+                        elif "choices" in full_response and full_response["choices"]:
+                            first_choice = full_response["choices"][0]
+                            if "message" in first_choice and "content" in first_choice["message"]:
+                                response = first_choice["message"]["content"]
+                    if response and response.strip():
+                        logger.info(f"Resposta obtida com sucesso de: {model_id} (síncrono)")
+                        yield response
+                        return
+                except Exception as sync_error:
+                    logger.warning(f"Erro na chamada síncrona com {model_id}: {sync_error}")
+            # Pequena pausa entre tentativas para não sobrecarregar
+            time.sleep(1)
+        except Exception as model_error:
+            logger.error(f"Erro geral com modelo {model_id}: {model_error}")
+            continue
+    # Se todos os modelos falharem
+    error_msg = (
+        "❌ **Erro persistente**: Nenhum modelo disponível no momento.\n\n"
+        "💡 **Soluções sugeridas**:\n"
+        "1. Verifique sua conexão com a internet\n"
+        "2. Configure um token HF válido em Settings → Secrets\n"
+        "3. Tente novamente em alguns minutos\n\n"
+        f"📋 Últimos modelos tentados: {', '.join(FALLBACK_MODELS[:3])}"
+    )
+    logger.error("Todos os modelos falharam")
+    yield error_msg
+def respond(message: str, history: list, system_message: str, max_tokens: int, temperature: float, top_p: float, hf_token_obj=None) -> Generator[str, None, None]:
+    """Função de resposta ultra-resiliente"""
+    # Validação de entrada
+    if not message or not message.strip():
+        yield "❌ **Mensagem vazia**: Por favor, digite uma mensagem válida."
+        return
+    # Obtém token válido
+    token = get_valid_token(hf_token_obj)
+    if not token:
+        warning_msg = (
+            "⚠️ **Sem autenticação**: Operando em modo limitado.\n"
+            "Para melhor performance e acesso a mais modelos:\n"
+            "1. Configure HF_TOKEN em Settings → Secrets\n"
+            "2. Ou faça login na interface\n\n"
+            "Tentando com modelos públicos..."
+        )
+        yield warning_msg
+        # Não retorna aqui, continua tentando com modelos públicos
+    # Prepara mensagens
+    messages = []
+    # Adiciona mensagem de sistema se válida
+    if system_message and system_message.strip():
+        messages.append({"role": "system", "content": system_message.strip()})
+    else:
+        # Mensagem de sistema padrão segura
+        messages.append({"role": "system", "content": "Você é um assistente útil e especializado em análise de estruturas de código."})
+    # Processa histórico com segurança
+    if history:
+        for entry in history:
+            try:
+                if isinstance(entry, dict) and "role" in entry and "content" in entry:
+                    messages.append(entry)
+                elif isinstance(entry, list) and len(entry) >= 2:
+                    # Formato [(user_msg, bot_response), ...]
+                    if entry[0]:  # mensagem do usuário
+                        messages.append({"role": "user", "content": str(entry[0])})
+                    if entry[1]:  # resposta do bot
+                        messages.append({"role": "assistant", "content": str(entry[1])})
+            except Exception as e:
+                logger.warning(f"Erro ao processar histórico: {e}")
+                continue
+    # Adiciona mensagem atual
+    messages.append({"role": "user", "content": message.strip()})
+    logger.info(f"Mensagens preparadas: {messages}")
+    # Tenta obter resposta com múltiplos fallbacks
+    try:
+        yield from try_models(messages, max_tokens, temperature, top_p, token)
+    except Exception as final_error:
+        logger.critical(f"Erro crítico inesperado: {final_error}")
+        yield (
+            "❌ **Erro crítico**: Ocorreu um problema inesperado.\n\n"
+            f"```python\n{str(final_error)}\n```\n\n"
+            "Por favor, recarregue a página e tente novamente."
+        )
+# Interface do usuário - simples e robusta
+with gr.Blocks(title="Assistente de Análise de Código") as demo:
+    gr.Markdown("# 🔍 Assistente de Análise de Estruturas de Código")
+    gr.Markdown("### Versão robusta com múltiplos fallbacks e recuperação de erros")
+    chatbot = gr.ChatInterface(
+        respond,
+        type="messages",
+        additional_inputs=[
+            gr.Textbox(
+                value="Você é um especialista em análise de estruturas de código e decompilação. Forneça respostas técnicas detalhadas e precisas.",
+                label="System Message",
+                lines=3
+            ),
+            gr.Slider(
+                minimum=1, maximum=4096, value=1024, step=1,
+                label="Max Tokens (aumente para respostas mais longas)"
+            ),
+            gr.Slider(
+                minimum=0.0, maximum=2.0, value=0.3, step=0.1,
+                label="Temperature (0.0 = preciso, 2.0 = criativo)"
+            ),
+            gr.Slider(
+                minimum=0.1, maximum=1.0, value=0.9, step=0.05,
+                label="Top-p (0.1 = focado, 1.0 = diverso)"
+            ),
+        ],
+        examples=[
+            ["Analise esta função vulnerável: `def process_input(data): eval(data)`"],
+            ["Qual a estrutura de memória desta classe C++?"],
+            ["Explique o assembly x86 deste código binário"],
+            ["Como funciona o mecanismo de herança neste código?"],
+        ],
+        cache_examples=False,
+        analytics_enabled=False,
+    )
+    with gr.Accordion("ℹ️ Informações e Solução de Problemas", open=False):
+        gr.Markdown("""
+        ### ✅ Este aplicativo é 100% robusto:
+        - **Múltiplos fallbacks**: Tenta até 5 modelos diferentes
+        - **Recuperação de erros**: Nunca falha completamente
+        - **Autenticação flexível**: Usa token do ambiente ou OAuth
+        - **Timeouts seguros**: Previne travamentos
+        - **Validação rigorosa**: Checa todas as entradas
+        ### 🛠️ Se ainda encontrar problemas:
+        1. **Configure HF_TOKEN**: Vá em Settings → Secrets e adicione seu token
+        2. **Recarregue a página**: Às vezes a conexão precisa ser renovada
+        3. **Simplifique sua query**: Modelos têm limites de contexto
+        4. **Verifique sua internet**: Necessária para chamadas à API
+        ### 📊 Modelos utilizados (em ordem de prioridade):
+        1. LLM4Binary/sk2decompile-struct-6.7b (especializado)
+        2. mradermacher/sk2decompile-struct-6.7b-GGUF (GGUF)
+        3. Meta-Llama-3-8B-Instruct (genérico confiável)
+        4. Mistral-7B-Instruct-v0.2 (alternativo)
+        5. Gemma-2b-it (fallback leve)
+        """)
 if __name__ == "__main__":
+    # Configurações de lançamento ultra-seguras
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_api=False,
+        favicon_path="https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
+        allowed_paths=["."],
+        auth=None,  # Não força autenticação
+        debug=False,  # Produção
+    )