michsethowusu's picture
Update app.py
dd830d8 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
# ------------------------------------------------------------------
# 1. Model setup
# ------------------------------------------------------------------
MODEL_ID = "michsethowusu/opani-coder_1b-merged-16bit"
print("Loading tokenizer…")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
print("Loading model…")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16,
device_map="auto",
low_cpu_mem_usage=True,
trust_remote_code=True
)
print("Model ready!")
# ------------------------------------------------------------------
# 2. Generation helper
# ------------------------------------------------------------------
def generate_response(message: str, history: list[dict], temperature, top_p, top_k, max_tokens):
"""
message: str — the newest user message
history: list[dict] — previous turns in {"role": "user"|"assistant", "content": "…"} format
yields partial assistant reply strings
"""
messages = history + [{"role": "user", "content": message}]
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True
)
gen_kwargs = dict(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
do_sample=True,
streamer=streamer,
)
thread = Thread(target=model.generate, kwargs=gen_kwargs)
thread.start()
partial = ""
for new_text in streamer:
partial += new_text
yield partial
thread.join()
# ------------------------------------------------------------------
# 3. Gradio event helpers
# ------------------------------------------------------------------
def user_submit(user_message, history):
# history is list[dict] — append user message
return "", history + [{"role": "user", "content": user_message}]
def bot_respond(history, temperature, top_p, top_k, max_tokens):
user_turn = history[-1]["content"]
history_before = history[:-1]
assistant_text = ""
for assistant_text in generate_response(
user_turn, history_before, temperature, top_p, top_k, max_tokens
):
# update last dict incrementally
history[-1] = {"role": "assistant", "content": assistant_text}
yield history
# ------------------------------------------------------------------
# 4. Gradio UI
# ------------------------------------------------------------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# 🇬🇭 Opani Coder 1B
A fine-tuned Llama 3.2 1B model (16-bit) for coding assistance in Twi.
Ask me anything about programming, and I'll help you out!
"""
)
chatbot = gr.Chatbot(
height=500,
label="Chat History",
type="messages",
avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"),
)
with gr.Row():
msg = gr.Textbox(
label="Your Message",
placeholder="Ask me a coding question…",
scale=4,
lines=2,
)
submit = gr.Button("Send 🚀", scale=1, variant="primary")
with gr.Accordion("⚙️ Generation Parameters", open=False):
gr.Markdown("*Adjust these settings to control the response style*")
temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, 0.9, step=0.05, label="Top P")
top_k = gr.Slider(1, 100, 20, step=1, label="Top K")
max_tokens = gr.Slider(64, 2048, 512, step=64, label="Max Tokens")
clear = gr.Button("🗑️ Clear Chat")
# ------------------------------------------------------------------
# 5. Examples
# ------------------------------------------------------------------
gr.Examples(
examples=[
["Meyɛ dɛn na mekyerɛw Python function?"],
["Kyerɛkyerɛ nea for loop yɛ"],
["Kyerɛw calculator program a ɛnyɛ den"],
["Nsonoe bɛn na ɛda list ne tuple ntam?"],
["Boa me ma mensiesie saa code yi mu mfomso"],
],
inputs=msg,
label="Example Questions"
)
# ------------------------------------------------------------------
# 6. Event wiring
# ------------------------------------------------------------------
msg.submit(
user_submit, [msg, chatbot], [msg, chatbot], queue=False
).then(
bot_respond,
[chatbot, temperature, top_p, top_k, max_tokens],
chatbot,
)
submit.click(
user_submit, [msg, chatbot], [msg, chatbot], queue=False
).then(
bot_respond,
[chatbot, temperature, top_p, top_k, max_tokens],
chatbot,
)
clear.click(lambda: None, None, chatbot, queue=False)
gr.Markdown(
"""
---
### 💡 Tips for Best Results:
- **Factual/Technical questions**: temperature 0.3-0.5
- **Creative coding solutions**: temperature 0.7-1.0
- **Code generation**: temperature 0.5-0.7
### 📝 About This Model
Fine-tuned Llama 3.2 1B (16-bit full model) for coding assistance in Twi.
**Model**: [michsethowusu/opani-coder_1b-merged-16bit](https://huggingface.co/michsethowusu/opani-coder_1b-merged-16bit)
"""
)
# ------------------------------------------------------------------
# 7. Launch
# ------------------------------------------------------------------
if __name__ == "__main__":
demo.queue().launch()