Spaces:

ghananlpcommunity
/

Opani-Coder-DEMO

Sleeping

App Files Files Community

Opani-Coder-DEMO / app.py

michsethowusu

Update app.py

dd830d8 verified 3 months ago

raw

history blame contribute delete

5.92 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from threading import Thread

	# ------------------------------------------------------------------
	# 1. Model setup
	# ------------------------------------------------------------------
	MODEL_ID = "michsethowusu/opani-coder_1b-merged-16bit"

	print("Loading tokenizer…")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

	print("Loading model…")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float16,
	device_map="auto",
	low_cpu_mem_usage=True,
	trust_remote_code=True
	)

	print("Model ready!")

	# ------------------------------------------------------------------
	# 2. Generation helper
	# ------------------------------------------------------------------
	def generate_response(message: str, history: list[dict], temperature, top_p, top_k, max_tokens):
	"""
	message: str — the newest user message
	history: list[dict] — previous turns in {"role": "user"\|"assistant", "content": "…"} format
	yields partial assistant reply strings
	"""
	messages = history + [{"role": "user", "content": message}]

	prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	streamer = TextIteratorStreamer(
	tokenizer,
	skip_prompt=True,
	skip_special_tokens=True
	)

	gen_kwargs = dict(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	do_sample=True,
	streamer=streamer,
	)

	thread = Thread(target=model.generate, kwargs=gen_kwargs)
	thread.start()

	partial = ""
	for new_text in streamer:
	partial += new_text
	yield partial

	thread.join()

	# ------------------------------------------------------------------
	# 3. Gradio event helpers
	# ------------------------------------------------------------------
	def user_submit(user_message, history):
	# history is list[dict] — append user message
	return "", history + [{"role": "user", "content": user_message}]


	def bot_respond(history, temperature, top_p, top_k, max_tokens):
	user_turn = history[-1]["content"]
	history_before = history[:-1]

	assistant_text = ""
	for assistant_text in generate_response(
	user_turn, history_before, temperature, top_p, top_k, max_tokens
	):
	# update last dict incrementally
	history[-1] = {"role": "assistant", "content": assistant_text}
	yield history

	# ------------------------------------------------------------------
	# 4. Gradio UI
	# ------------------------------------------------------------------
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# 🇬🇭 Opani Coder 1B
	A fine-tuned Llama 3.2 1B model (16-bit) for coding assistance in Twi.
	Ask me anything about programming, and I'll help you out!
	"""
	)

	chatbot = gr.Chatbot(
	height=500,
	label="Chat History",
	type="messages",
	avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"),
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Your Message",
	placeholder="Ask me a coding question…",
	scale=4,
	lines=2,
	)
	submit = gr.Button("Send 🚀", scale=1, variant="primary")

	with gr.Accordion("⚙️ Generation Parameters", open=False):
	gr.Markdown("Adjust these settings to control the response style")
	temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
	top_p = gr.Slider(0.1, 1.0, 0.9, step=0.05, label="Top P")
	top_k = gr.Slider(1, 100, 20, step=1, label="Top K")
	max_tokens = gr.Slider(64, 2048, 512, step=64, label="Max Tokens")

	clear = gr.Button("🗑️ Clear Chat")

	# ------------------------------------------------------------------
	# 5. Examples
	# ------------------------------------------------------------------
	gr.Examples(
	examples=[
	["Meyɛ dɛn na mekyerɛw Python function?"],
	["Kyerɛkyerɛ nea for loop yɛ"],
	["Kyerɛw calculator program a ɛnyɛ den"],
	["Nsonoe bɛn na ɛda list ne tuple ntam?"],
	["Boa me ma mensiesie saa code yi mu mfomso"],
	],
	inputs=msg,
	label="Example Questions"
	)

	# ------------------------------------------------------------------
	# 6. Event wiring
	# ------------------------------------------------------------------
	msg.submit(
	user_submit, [msg, chatbot], [msg, chatbot], queue=False
	).then(
	bot_respond,
	[chatbot, temperature, top_p, top_k, max_tokens],
	chatbot,
	)

	submit.click(
	user_submit, [msg, chatbot], [msg, chatbot], queue=False
	).then(
	bot_respond,
	[chatbot, temperature, top_p, top_k, max_tokens],
	chatbot,
	)

	clear.click(lambda: None, None, chatbot, queue=False)

	gr.Markdown(
	"""
	---
	### 💡 Tips for Best Results:
	- Factual/Technical questions: temperature 0.3-0.5
	- Creative coding solutions: temperature 0.7-1.0
	- Code generation: temperature 0.5-0.7

	### 📝 About This Model
	Fine-tuned Llama 3.2 1B (16-bit full model) for coding assistance in Twi.
	Model: [michsethowusu/opani-coder_1b-merged-16bit](https://huggingface.co/michsethowusu/opani-coder_1b-merged-16bit)
	"""
	)

	# ------------------------------------------------------------------
	# 7. Launch
	# ------------------------------------------------------------------
	if __name__ == "__main__":
	demo.queue().launch()