Sk2decompile / app.py
TaylorKaua's picture
Update app.py
e602dc7 verified
# app.py — versão corrigida e mais robusta
import os
import gradio as gr
from huggingface_hub import InferenceClient
MODEL_ID = "mradermacher/sk2decompile-struct-6.7b-GGUF" # ajuste se necessário
def make_client(hf_token):
"""
Retorna um InferenceClient tentando, na ordem:
1) token passado pelo gr.LoginButton (hf_token.token)
2) variável de ambiente HF_TOKEN (útil como Secret no Space)
3) sem token (anônimo) — pode falhar dependendo do modelo
"""
token = None
if hf_token:
# hf_token pode ser None ou um objeto com .token
try:
token = hf_token.token
except Exception:
token = None
if not token:
token = os.environ.get("HF_TOKEN")
if token:
return InferenceClient(token=token, model=MODEL_ID)
else:
# sem token; InferenceClient aceita não passar token (anon)
return InferenceClient(model=MODEL_ID)
def extract_token_from_chunk(chunk):
"""
Extrai o fragmento gerado do chunk de stream do InferenceClient,
suportando alguns formatos possíveis:
- objeto com .choices[].delta.content
- dict com choices -> delta -> content
- top-level 'generated_text' ou 'text'
Retorna string ("" se nada).
"""
try:
# caso venham objetos com atributos
if hasattr(chunk, "choices"):
choices = chunk.choices
if choices and len(choices) > 0:
delta = choices[0].delta if hasattr(choices[0], "delta") else None
if delta:
return getattr(delta, "content", "") or ""
# caso seja dict-like
if isinstance(chunk, dict):
# top-level generated_text
if "generated_text" in chunk and chunk["generated_text"]:
return chunk["generated_text"]
if "text" in chunk and chunk["text"]:
return chunk["text"]
choices = chunk.get("choices") or []
if len(choices) > 0:
first = choices[0]
# delta as dict
delta = first.get("delta") if isinstance(first, dict) else None
if delta:
return delta.get("content", "") or ""
# older style: message/content
msg = first.get("message") if isinstance(first, dict) else None
if msg and isinstance(msg, dict):
return msg.get("content", "") or ""
# fallback vazio
return ""
except Exception:
return ""
def respond(
message,
history: list[dict[str, str]],
system_message,
max_tokens,
temperature,
top_p,
hf_token: gr.OAuthToken,
):
"""
Handler para gr.ChatInterface. Retorna um generator que emite
o texto cumulativo (streaming) — compatível com gradio.
"""
try:
client = make_client(hf_token)
except Exception as e:
yield f"Erro ao criar InferenceClient: {e}"
return
# montar mensagens no formato esperado (role/content)
messages = [{"role": "system", "content": system_message}] if system_message else []
# history: dependendo do gradio, pode já ser no formato messages; tratamos apenas uma lista de dicts role/content
if history:
# se history vir como pares [("user","..."),("assistant","..."), ...] convert
if isinstance(history, list) and len(history) and isinstance(history[0], (list, tuple)):
for u, a in history:
messages.append({"role": "user", "content": u})
messages.append({"role": "assistant", "content": a})
else:
# assumimos que history já esteja em formato role/content dicts ou semelhante
for item in history:
# se for tuple ignore — caso comum não usar
if isinstance(item, dict) and "role" in item and "content" in item:
messages.append(item)
messages.append({"role": "user", "content": message})
response = ""
try:
stream = client.chat_completion(
messages=messages,
max_tokens=int(max_tokens),
stream=True,
temperature=float(temperature),
top_p=float(top_p),
)
except Exception as e:
yield f"Erro ao chamar chat_completion: {e}"
return
# Itera sobre o stream e acumula o texto extraído
try:
for chunk in stream:
token = extract_token_from_chunk(chunk)
if token:
response += token
yield response
# quando o stream termina, garante que o conteúdo final seja entregue
if response == "":
# se não houve fragmentos, tenta obter resposta final sem stream
try:
final = client.chat_completion(messages=messages, max_tokens=int(max_tokens), stream=False, temperature=float(temperature), top_p=float(top_p))
# final pode ser objeto ou dict
if hasattr(final, "choices"):
# tentar extrair
try:
content = final.choices[0].message.content
except Exception:
content = ""
elif isinstance(final, dict):
# procurar generated_text ou choices[0].message.content
content = final.get("generated_text", "") or ""
if not content:
ch = final.get("choices", [])
if len(ch) and isinstance(ch[0], dict):
msg = ch[0].get("message", {})
if isinstance(msg, dict):
content = msg.get("content", "") or ""
else:
content = ""
if content:
response += content
yield response
except Exception:
# nada a fazer mais
pass
except Exception as e:
yield f"Erro durante streaming: {e}"
return
# configuração do ChatInterface / UI
chatbot = gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
# o LoginButton fornece um OAuth token quando o usuário loga no Hugging Face
gr.LoginButton(),
],
)
with gr.Blocks() as demo:
with gr.Sidebar():
gr.Markdown("Login com Hugging Face para usar o Inference API (recomendado).")
chatbot.render()
if __name__ == "__main__":
demo.launch()