Spaces:

TaylorKaua
/

Sk2decompile

Sleeping

App Files Files Community

Sk2decompile / app.py

TaylorKaua

Update app.py

e602dc7 verified 4 months ago

raw

history blame contribute delete

6.93 kB

	# app.py — versão corrigida e mais robusta
	import os
	import gradio as gr
	from huggingface_hub import InferenceClient

	MODEL_ID = "mradermacher/sk2decompile-struct-6.7b-GGUF" # ajuste se necessário

	def make_client(hf_token):
	"""
	Retorna um InferenceClient tentando, na ordem:
	1) token passado pelo gr.LoginButton (hf_token.token)
	2) variável de ambiente HF_TOKEN (útil como Secret no Space)
	3) sem token (anônimo) — pode falhar dependendo do modelo
	"""
	token = None
	if hf_token:
	# hf_token pode ser None ou um objeto com .token
	try:
	token = hf_token.token
	except Exception:
	token = None

	if not token:
	token = os.environ.get("HF_TOKEN")

	if token:
	return InferenceClient(token=token, model=MODEL_ID)
	else:
	# sem token; InferenceClient aceita não passar token (anon)
	return InferenceClient(model=MODEL_ID)


	def extract_token_from_chunk(chunk):
	"""
	Extrai o fragmento gerado do chunk de stream do InferenceClient,
	suportando alguns formatos possíveis:
	- objeto com .choices[].delta.content
	- dict com choices -> delta -> content
	- top-level 'generated_text' ou 'text'
	Retorna string ("" se nada).
	"""
	try:
	# caso venham objetos com atributos
	if hasattr(chunk, "choices"):
	choices = chunk.choices
	if choices and len(choices) > 0:
	delta = choices[0].delta if hasattr(choices[0], "delta") else None
	if delta:
	return getattr(delta, "content", "") or ""
	# caso seja dict-like
	if isinstance(chunk, dict):
	# top-level generated_text
	if "generated_text" in chunk and chunk["generated_text"]:
	return chunk["generated_text"]
	if "text" in chunk and chunk["text"]:
	return chunk["text"]
	choices = chunk.get("choices") or []
	if len(choices) > 0:
	first = choices[0]
	# delta as dict
	delta = first.get("delta") if isinstance(first, dict) else None
	if delta:
	return delta.get("content", "") or ""
	# older style: message/content
	msg = first.get("message") if isinstance(first, dict) else None
	if msg and isinstance(msg, dict):
	return msg.get("content", "") or ""
	# fallback vazio
	return ""
	except Exception:
	return ""


	def respond(
	message,
	history: list[dict[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	hf_token: gr.OAuthToken,
	):
	"""
	Handler para gr.ChatInterface. Retorna um generator que emite
	o texto cumulativo (streaming) — compatível com gradio.
	"""
	try:
	client = make_client(hf_token)
	except Exception as e:
	yield f"Erro ao criar InferenceClient: {e}"
	return

	# montar mensagens no formato esperado (role/content)
	messages = [{"role": "system", "content": system_message}] if system_message else []
	# history: dependendo do gradio, pode já ser no formato messages; tratamos apenas uma lista de dicts role/content
	if history:
	# se history vir como pares [("user","..."),("assistant","..."), ...] convert
	if isinstance(history, list) and len(history) and isinstance(history[0], (list, tuple)):
	for u, a in history:
	messages.append({"role": "user", "content": u})
	messages.append({"role": "assistant", "content": a})
	else:
	# assumimos que history já esteja em formato role/content dicts ou semelhante
	for item in history:
	# se for tuple ignore — caso comum não usar
	if isinstance(item, dict) and "role" in item and "content" in item:
	messages.append(item)

	messages.append({"role": "user", "content": message})

	response = ""
	try:
	stream = client.chat_completion(
	messages=messages,
	max_tokens=int(max_tokens),
	stream=True,
	temperature=float(temperature),
	top_p=float(top_p),
	)
	except Exception as e:
	yield f"Erro ao chamar chat_completion: {e}"
	return

	# Itera sobre o stream e acumula o texto extraído
	try:
	for chunk in stream:
	token = extract_token_from_chunk(chunk)
	if token:
	response += token
	yield response
	# quando o stream termina, garante que o conteúdo final seja entregue
	if response == "":
	# se não houve fragmentos, tenta obter resposta final sem stream
	try:
	final = client.chat_completion(messages=messages, max_tokens=int(max_tokens), stream=False, temperature=float(temperature), top_p=float(top_p))
	# final pode ser objeto ou dict
	if hasattr(final, "choices"):
	# tentar extrair
	try:
	content = final.choices[0].message.content
	except Exception:
	content = ""
	elif isinstance(final, dict):
	# procurar generated_text ou choices[0].message.content
	content = final.get("generated_text", "") or ""
	if not content:
	ch = final.get("choices", [])
	if len(ch) and isinstance(ch[0], dict):
	msg = ch[0].get("message", {})
	if isinstance(msg, dict):
	content = msg.get("content", "") or ""
	else:
	content = ""
	if content:
	response += content
	yield response
	except Exception:
	# nada a fazer mais
	pass
	except Exception as e:
	yield f"Erro durante streaming: {e}"
	return

	# configuração do ChatInterface / UI
	chatbot = gr.ChatInterface(
	respond,
	type="messages",
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
	# o LoginButton fornece um OAuth token quando o usuário loga no Hugging Face
	gr.LoginButton(),
	],
	)

	with gr.Blocks() as demo:
	with gr.Sidebar():
	gr.Markdown("Login com Hugging Face para usar o Inference API (recomendado).")
	chatbot.render()

	if __name__ == "__main__":
	demo.launch()