Spaces:
Sleeping
Sleeping
| # app.py — versão corrigida e mais robusta | |
| import os | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| MODEL_ID = "mradermacher/sk2decompile-struct-6.7b-GGUF" # ajuste se necessário | |
| def make_client(hf_token): | |
| """ | |
| Retorna um InferenceClient tentando, na ordem: | |
| 1) token passado pelo gr.LoginButton (hf_token.token) | |
| 2) variável de ambiente HF_TOKEN (útil como Secret no Space) | |
| 3) sem token (anônimo) — pode falhar dependendo do modelo | |
| """ | |
| token = None | |
| if hf_token: | |
| # hf_token pode ser None ou um objeto com .token | |
| try: | |
| token = hf_token.token | |
| except Exception: | |
| token = None | |
| if not token: | |
| token = os.environ.get("HF_TOKEN") | |
| if token: | |
| return InferenceClient(token=token, model=MODEL_ID) | |
| else: | |
| # sem token; InferenceClient aceita não passar token (anon) | |
| return InferenceClient(model=MODEL_ID) | |
| def extract_token_from_chunk(chunk): | |
| """ | |
| Extrai o fragmento gerado do chunk de stream do InferenceClient, | |
| suportando alguns formatos possíveis: | |
| - objeto com .choices[].delta.content | |
| - dict com choices -> delta -> content | |
| - top-level 'generated_text' ou 'text' | |
| Retorna string ("" se nada). | |
| """ | |
| try: | |
| # caso venham objetos com atributos | |
| if hasattr(chunk, "choices"): | |
| choices = chunk.choices | |
| if choices and len(choices) > 0: | |
| delta = choices[0].delta if hasattr(choices[0], "delta") else None | |
| if delta: | |
| return getattr(delta, "content", "") or "" | |
| # caso seja dict-like | |
| if isinstance(chunk, dict): | |
| # top-level generated_text | |
| if "generated_text" in chunk and chunk["generated_text"]: | |
| return chunk["generated_text"] | |
| if "text" in chunk and chunk["text"]: | |
| return chunk["text"] | |
| choices = chunk.get("choices") or [] | |
| if len(choices) > 0: | |
| first = choices[0] | |
| # delta as dict | |
| delta = first.get("delta") if isinstance(first, dict) else None | |
| if delta: | |
| return delta.get("content", "") or "" | |
| # older style: message/content | |
| msg = first.get("message") if isinstance(first, dict) else None | |
| if msg and isinstance(msg, dict): | |
| return msg.get("content", "") or "" | |
| # fallback vazio | |
| return "" | |
| except Exception: | |
| return "" | |
| def respond( | |
| message, | |
| history: list[dict[str, str]], | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| hf_token: gr.OAuthToken, | |
| ): | |
| """ | |
| Handler para gr.ChatInterface. Retorna um generator que emite | |
| o texto cumulativo (streaming) — compatível com gradio. | |
| """ | |
| try: | |
| client = make_client(hf_token) | |
| except Exception as e: | |
| yield f"Erro ao criar InferenceClient: {e}" | |
| return | |
| # montar mensagens no formato esperado (role/content) | |
| messages = [{"role": "system", "content": system_message}] if system_message else [] | |
| # history: dependendo do gradio, pode já ser no formato messages; tratamos apenas uma lista de dicts role/content | |
| if history: | |
| # se history vir como pares [("user","..."),("assistant","..."), ...] convert | |
| if isinstance(history, list) and len(history) and isinstance(history[0], (list, tuple)): | |
| for u, a in history: | |
| messages.append({"role": "user", "content": u}) | |
| messages.append({"role": "assistant", "content": a}) | |
| else: | |
| # assumimos que history já esteja em formato role/content dicts ou semelhante | |
| for item in history: | |
| # se for tuple ignore — caso comum não usar | |
| if isinstance(item, dict) and "role" in item and "content" in item: | |
| messages.append(item) | |
| messages.append({"role": "user", "content": message}) | |
| response = "" | |
| try: | |
| stream = client.chat_completion( | |
| messages=messages, | |
| max_tokens=int(max_tokens), | |
| stream=True, | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| ) | |
| except Exception as e: | |
| yield f"Erro ao chamar chat_completion: {e}" | |
| return | |
| # Itera sobre o stream e acumula o texto extraído | |
| try: | |
| for chunk in stream: | |
| token = extract_token_from_chunk(chunk) | |
| if token: | |
| response += token | |
| yield response | |
| # quando o stream termina, garante que o conteúdo final seja entregue | |
| if response == "": | |
| # se não houve fragmentos, tenta obter resposta final sem stream | |
| try: | |
| final = client.chat_completion(messages=messages, max_tokens=int(max_tokens), stream=False, temperature=float(temperature), top_p=float(top_p)) | |
| # final pode ser objeto ou dict | |
| if hasattr(final, "choices"): | |
| # tentar extrair | |
| try: | |
| content = final.choices[0].message.content | |
| except Exception: | |
| content = "" | |
| elif isinstance(final, dict): | |
| # procurar generated_text ou choices[0].message.content | |
| content = final.get("generated_text", "") or "" | |
| if not content: | |
| ch = final.get("choices", []) | |
| if len(ch) and isinstance(ch[0], dict): | |
| msg = ch[0].get("message", {}) | |
| if isinstance(msg, dict): | |
| content = msg.get("content", "") or "" | |
| else: | |
| content = "" | |
| if content: | |
| response += content | |
| yield response | |
| except Exception: | |
| # nada a fazer mais | |
| pass | |
| except Exception as e: | |
| yield f"Erro durante streaming: {e}" | |
| return | |
| # configuração do ChatInterface / UI | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| type="messages", | |
| additional_inputs=[ | |
| gr.Textbox(value="You are a friendly Chatbot.", label="System message"), | |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), | |
| # o LoginButton fornece um OAuth token quando o usuário loga no Hugging Face | |
| gr.LoginButton(), | |
| ], | |
| ) | |
| with gr.Blocks() as demo: | |
| with gr.Sidebar(): | |
| gr.Markdown("Login com Hugging Face para usar o Inference API (recomendado).") | |
| chatbot.render() | |
| if __name__ == "__main__": | |
| demo.launch() |