Spaces:

gooookim
/

bai_test_02

Paused

App Files Files Community

bai_test_02 / app.py

gooookim

Update app.py

bfb3e24 verified 3 months ago

raw

history blame contribute delete

2.79 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from huggingface_hub.utils import HfHubHTTPError


	def respond(
	message,
	history: list[dict[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	hf_token_text, # additional_inputs에서 입력받음
	):
	try:
	if not hf_token_text or not str(hf_token_text).strip():
	raise gr.Error("Hugging Face 토큰을 입력해 주세요. (hf_...)")

	client = InferenceClient(token=str(hf_token_text).strip())

	messages = [{"role": "system", "content": system_message}]
	messages.extend(history)
	messages.append({"role": "user", "content": message})

	response = ""

	stream = client.chat.completions.create(
	model="openai/gpt-oss-20b", # ✅ 20B로 변경
	messages=messages,
	max_tokens=int(max_tokens),
	stream=True,
	temperature=float(temperature),
	top_p=float(top_p),
	)

	for chunk in stream:
	token = ""
	try:
	token = chunk.choices[0].delta.content or ""
	except Exception:
	token = ""

	if token:
	response += token
	yield response

	except HfHubHTTPError as e:
	yield f"[오류] Hugging Face 요청 실패: {e}"
	except Exception as e:
	yield f"[오류] 모델 호출 실패: {type(e).__name__}: {e}"


	with gr.Blocks() as demo:
	with gr.Accordion("설정", open=False):
	system_message = gr.Textbox(
	value="You are a friendly Chatbot.",
	label="System message"
	)

	# ✅ 20B 기준: 출력 토큰 상한 축소 (현실적/안정적)
	max_tokens = gr.Slider(
	minimum=1,
	maximum=4096, # ← 20B에 적합한 상한
	value=1024,
	step=1,
	label="Max new tokens"
	)

	# ✅ 기본값 1.0 유지 (안정적)
	temperature = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=1.0,
	step=0.05,
	label="Temperature"
	)

	# ✅ 기본값 1.0 유지
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=1.0,
	step=0.05,
	label="Top-p (nucleus sampling)"
	)

	hf_token_text = gr.Textbox(
	label="HF Token (hf_...)",
	type="password"
	)

	chatbot = gr.ChatInterface(
	respond,
	type="messages",
	additional_inputs=[
	system_message,
	max_tokens,
	temperature,
	top_p,
	hf_token_text,
	],
	)

	if __name__ == "__main__":
	demo.launch()