Spaces:

TaylorKaua
/

Sk2decompile2

Sleeping

App Files Files Community

TaylorKaua commited on Nov 7

Commit

2048899

verified ·

1 Parent(s): 5bd5fe9

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -20

app.py CHANGED Viewed

@@ -1,28 +1,146 @@
 import os
 import gradio as gr
 from huggingface_hub import InferenceClient
-MODEL_ID = "mradermacher/sk2decompile-struct-6.7b-GGUF"
-def get_client(hf_token):
-    token = getattr(hf_token, "token", None) or os.getenv("HF_TOKEN")
     return InferenceClient(model=MODEL_ID, token=token)
-def respond(message, history, system_message, max_tokens, temperature, top_p, hf_token: gr.OAuthToken):
-    try:
-        client = get_client(hf_token)
-        messages = [{"role": "system", "content": system_message}]
-        messages += history
-        messages.append({"role": "user", "content": message})
-        response = ""
-        for chunk in client.chat_completion(messages=messages, max_tokens=max_tokens, stream=True,
-                                            temperature=temperature, top_p=top_p):
-            token = getattr(chunk.choices[0].delta, "content", "") if hasattr(chunk.choices[0], "delta") else ""
-            response += token
-            yield response
     except Exception as e:
-        yield f"❌ Erro: {e}"
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
@@ -31,14 +149,12 @@ chatbot = gr.ChatInterface(
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
-        gr.LoginButton(),
     ],
 )
-demo = gr.Blocks()
-with demo:
     with gr.Sidebar():
-        gr.Markdown("Login com Hugging Face para usar o modelo protegido.")
     chatbot.render()
 if __name__ == "__main__":

+# app.py — versão robusta sem LoginButton (usa HF_TOKEN secret)
 import os
 import gradio as gr
 from huggingface_hub import InferenceClient
+MODEL_ID = "LLM4Binary/sk2decompile-struct-6.7b"  # use endpoint hospedado (Inference API)
+def get_client(hf_token_obj):
+    # hf_token_obj vem do Gradio apenas quando há OAuth; senão usamos HF_TOKEN secret
+    token = None
+    try:
+        token = getattr(hf_token_obj, "token", None)
+    except Exception:
+        token = None
+    if not token:
+        token = os.getenv("HF_TOKEN")
     return InferenceClient(model=MODEL_ID, token=token)
+def respond(message, history, *args):
+    """
+    Maneira robusta de receber argumentos vindos do ChatInterface.
+    args (na ordem do additional_inputs):
+      0 -> system_message (Textbox)
+      1 -> max_tokens (Slider)
+      2 -> temperature (Slider)
+      3 -> top_p (Slider)
+      4 -> hf_token (quando existe Login/OAuth) -- aqui normalmente None porque removemos LoginButton
+    """
+    # defaults
+    system_message = ""
+    try:
+        if len(args) >= 1 and args[0] is not None:
+            system_message = args[0]
+    except Exception:
+        system_message = ""
+    try:
+        max_tokens = int(args[1]) if len(args) >= 2 and args[1] is not None else 512
+    except Exception:
+        max_tokens = 512
+    try:
+        temperature = float(args[2]) if len(args) >= 3 and args[2] is not None else 0.7
+    except Exception:
+        temperature = 0.7
+    try:
+        top_p = float(args[3]) if len(args) >= 4 and args[3] is not None else 0.95
+    except Exception:
+        top_p = 0.95
+    hf_token_obj = args[4] if len(args) >= 5 else None
+    try:
+        client = get_client(hf_token_obj)
+    except Exception as e:
+        yield f"❌ Erro ao criar client: {e}"
+        return
+    msgs = []
+    if system_message:
+        msgs.append({"role": "system", "content": system_message})
+    # history normalmente é lista de pares (user, assistant) — ChatInterface pode enviar de formas diferentes
+    if history:
+        # se history já vier no formato role/content, adiciona diretamente
+        for item in history:
+            if isinstance(item, dict) and "role" in item and "content" in item:
+                msgs.append(item)
+            elif isinstance(item, (list, tuple)) and len(item) == 2:
+                # item = (user_text, assistant_text) — adiciona como duas mensagens
+                msgs.append({"role": "user", "content": item[0]})
+                msgs.append({"role": "assistant", "content": item[1]})
+    msgs.append({"role": "user", "content": message})
+    response = ""
+    try:
+        stream = client.chat_completion(
+            messages=msgs,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        )
+    except Exception as e:
+        yield f"❌ Erro ao chamar chat_completion: {e}"
+        return
+    # extrator simples e tolerante de tokens
+    def _get_chunk_text(chunk):
+        try:
+            if hasattr(chunk, "choices"):
+                c = chunk.choices
+                if c and len(c) > 0:
+                    delta = getattr(c[0], "delta", None)
+                    if delta:
+                        return getattr(delta, "content", "") or ""
+            if isinstance(chunk, dict):
+                if "generated_text" in chunk and chunk["generated_text"]:
+                    return chunk["generated_text"]
+                ch = chunk.get("choices", [])
+                if ch and isinstance(ch, list) and len(ch) > 0:
+                    first = ch[0]
+                    if isinstance(first, dict):
+                        d = first.get("delta")
+                        if isinstance(d, dict):
+                            return d.get("content", "") or ""
+                        msg = first.get("message")
+                        if isinstance(msg, dict):
+                            return msg.get("content", "") or ""
+            return ""
+        except Exception:
+            return ""
+    try:
+        for chunk in stream:
+            token = _get_chunk_text(chunk)
+            if token:
+                response += token
+                yield response
+        # se nada veio pelo stream, tentar chamada síncrona como fallback
+        if response == "":
+            try:
+                final = client.chat_completion(messages=msgs, max_tokens=max_tokens, stream=False,
+                                               temperature=temperature, top_p=top_p)
+                # tentar extrair texto do final
+                if isinstance(final, dict) and "generated_text" in final:
+                    response = final["generated_text"]
+                elif hasattr(final, "choices") and final.choices:
+                    # tentar acessar message/content
+                    try:
+                        response = final.choices[0].message.content
+                    except Exception:
+                        # fallback
+                        pass
+                if response:
+                    yield response
+            except Exception:
+                pass
     except Exception as e:
+        yield f"❌ Erro durante streaming: {e}"
+        return
+# UI — note que REMOVI o gr.LoginButton() para evitar exigir OAUTH config
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
 )
+with gr.Blocks() as demo:
     with gr.Sidebar():
+        gr.Markdown("Configure HF_TOKEN em Settings → Secrets (opcional).")
     chatbot.render()
 if __name__ == "__main__":